]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Merge mandoc from vendor into contrib and provide the necessary Makefile glue.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294 static __inline void em_rx_discard(struct rx_ring *, int);
295
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303
304 static device_method_t em_methods[] = {
305         /* Device interface */
306         DEVMETHOD(device_probe, em_probe),
307         DEVMETHOD(device_attach, em_attach),
308         DEVMETHOD(device_detach, em_detach),
309         DEVMETHOD(device_shutdown, em_shutdown),
310         DEVMETHOD(device_suspend, em_suspend),
311         DEVMETHOD(device_resume, em_resume),
312         {0, 0}
313 };
314
315 static driver_t em_driver = {
316         "em", em_methods, sizeof(struct adapter),
317 };
318
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327
328 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN                       66
331
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO        0
335 #endif
336
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413
414 static int
415 em_probe(device_t dev)
416 {
417         char            adapter_name[60];
418         u16             pci_vendor_id = 0;
419         u16             pci_device_id = 0;
420         u16             pci_subvendor_id = 0;
421         u16             pci_subdevice_id = 0;
422         em_vendor_info_t *ent;
423
424         INIT_DEBUGOUT("em_probe: begin");
425
426         pci_vendor_id = pci_get_vendor(dev);
427         if (pci_vendor_id != EM_VENDOR_ID)
428                 return (ENXIO);
429
430         pci_device_id = pci_get_device(dev);
431         pci_subvendor_id = pci_get_subvendor(dev);
432         pci_subdevice_id = pci_get_subdevice(dev);
433
434         ent = em_vendor_info_array;
435         while (ent->vendor_id != 0) {
436                 if ((pci_vendor_id == ent->vendor_id) &&
437                     (pci_device_id == ent->device_id) &&
438
439                     ((pci_subvendor_id == ent->subvendor_id) ||
440                     (ent->subvendor_id == PCI_ANY_ID)) &&
441
442                     ((pci_subdevice_id == ent->subdevice_id) ||
443                     (ent->subdevice_id == PCI_ANY_ID))) {
444                         sprintf(adapter_name, "%s %s",
445                                 em_strings[ent->index],
446                                 em_driver_version);
447                         device_set_desc_copy(dev, adapter_name);
448                         return (BUS_PROBE_DEFAULT);
449                 }
450                 ent++;
451         }
452
453         return (ENXIO);
454 }
455
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465
466 static int
467 em_attach(device_t dev)
468 {
469         struct adapter  *adapter;
470         struct e1000_hw *hw;
471         int             error = 0;
472
473         INIT_DEBUGOUT("em_attach: begin");
474
475         if (resource_disabled("em", device_get_unit(dev))) {
476                 device_printf(dev, "Disabled by device hint\n");
477                 return (ENXIO);
478         }
479
480         adapter = device_get_softc(dev);
481         adapter->dev = adapter->osdep.dev = dev;
482         hw = &adapter->hw;
483         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485         /* SYSCTL stuff */
486         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489             em_sysctl_nvm_info, "I", "NVM Information");
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             em_sysctl_debug_info, "I", "Debug Information");
495
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_set_flowcntl, "I", "Flow Control");
500
501         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503         /* Determine hardware and mac info */
504         em_identify_hardware(adapter);
505
506         /* Setup PCI resources */
507         if (em_allocate_pci_resources(adapter)) {
508                 device_printf(dev, "Allocation of PCI resources failed\n");
509                 error = ENXIO;
510                 goto err_pci;
511         }
512
513         /*
514         ** For ICH8 and family we need to
515         ** map the flash memory, and this
516         ** must happen after the MAC is 
517         ** identified
518         */
519         if ((hw->mac.type == e1000_ich8lan) ||
520             (hw->mac.type == e1000_ich9lan) ||
521             (hw->mac.type == e1000_ich10lan) ||
522             (hw->mac.type == e1000_pchlan) ||
523             (hw->mac.type == e1000_pch2lan)) {
524                 int rid = EM_BAR_TYPE_FLASH;
525                 adapter->flash = bus_alloc_resource_any(dev,
526                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
527                 if (adapter->flash == NULL) {
528                         device_printf(dev, "Mapping of Flash failed\n");
529                         error = ENXIO;
530                         goto err_pci;
531                 }
532                 /* This is used in the shared code */
533                 hw->flash_address = (u8 *)adapter->flash;
534                 adapter->osdep.flash_bus_space_tag =
535                     rman_get_bustag(adapter->flash);
536                 adapter->osdep.flash_bus_space_handle =
537                     rman_get_bushandle(adapter->flash);
538         }
539
540         /* Do Shared Code initialization */
541         if (e1000_setup_init_funcs(hw, TRUE)) {
542                 device_printf(dev, "Setup of Shared code failed\n");
543                 error = ENXIO;
544                 goto err_pci;
545         }
546
547         e1000_get_bus_info(hw);
548
549         /* Set up some sysctls for the tunable interrupt delays */
550         em_add_int_delay_sysctl(adapter, "rx_int_delay",
551             "receive interrupt delay in usecs", &adapter->rx_int_delay,
552             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553         em_add_int_delay_sysctl(adapter, "tx_int_delay",
554             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557             "receive interrupt delay limit in usecs",
558             &adapter->rx_abs_int_delay,
559             E1000_REGISTER(hw, E1000_RADV),
560             em_rx_abs_int_delay_dflt);
561         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562             "transmit interrupt delay limit in usecs",
563             &adapter->tx_abs_int_delay,
564             E1000_REGISTER(hw, E1000_TADV),
565             em_tx_abs_int_delay_dflt);
566
567         /* Sysctl for limiting the amount of work done in the taskqueue */
568         em_set_sysctl_value(adapter, "rx_processing_limit",
569             "max number of rx packets to process", &adapter->rx_process_limit,
570             em_rx_process_limit);
571
572         /*
573          * Validate number of transmit and receive descriptors. It
574          * must not exceed hardware maximum, and must be multiple
575          * of E1000_DBA_ALIGN.
576          */
577         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580                     EM_DEFAULT_TXD, em_txd);
581                 adapter->num_tx_desc = EM_DEFAULT_TXD;
582         } else
583                 adapter->num_tx_desc = em_txd;
584
585         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588                     EM_DEFAULT_RXD, em_rxd);
589                 adapter->num_rx_desc = EM_DEFAULT_RXD;
590         } else
591                 adapter->num_rx_desc = em_rxd;
592
593         hw->mac.autoneg = DO_AUTO_NEG;
594         hw->phy.autoneg_wait_to_complete = FALSE;
595         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597         /* Copper options */
598         if (hw->phy.media_type == e1000_media_type_copper) {
599                 hw->phy.mdix = AUTO_ALL_MODES;
600                 hw->phy.disable_polarity_correction = FALSE;
601                 hw->phy.ms_type = EM_MASTER_SLAVE;
602         }
603
604         /*
605          * Set the frame limits assuming
606          * standard ethernet sized frames.
607          */
608         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611         /*
612          * This controls when hardware reports transmit completion
613          * status.
614          */
615         hw->mac.report_tx_early = 1;
616
617         /* 
618         ** Get queue/ring memory
619         */
620         if (em_allocate_queues(adapter)) {
621                 error = ENOMEM;
622                 goto err_pci;
623         }
624
625         /* Allocate multicast array memory. */
626         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628         if (adapter->mta == NULL) {
629                 device_printf(dev, "Can not allocate multicast setup array\n");
630                 error = ENOMEM;
631                 goto err_late;
632         }
633
634         /* Check SOL/IDER usage */
635         if (e1000_check_reset_block(hw))
636                 device_printf(dev, "PHY reset is blocked"
637                     " due to SOL/IDER session.\n");
638
639         /* Sysctl for setting Energy Efficient Ethernet */
640         hw->dev_spec.ich8lan.eee_disable = eee_setting;
641         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644             adapter, 0, em_sysctl_eee, "I",
645             "Disable Energy Efficient Ethernet");
646
647         /*
648         ** Start from a known state, this is
649         ** important in reading the nvm and
650         ** mac from that.
651         */
652         e1000_reset_hw(hw);
653
654
655         /* Make sure we have a good EEPROM before we read from it */
656         if (e1000_validate_nvm_checksum(hw) < 0) {
657                 /*
658                 ** Some PCI-E parts fail the first check due to
659                 ** the link being in sleep state, call it again,
660                 ** if it fails a second time its a real issue.
661                 */
662                 if (e1000_validate_nvm_checksum(hw) < 0) {
663                         device_printf(dev,
664                             "The EEPROM Checksum Is Not Valid\n");
665                         error = EIO;
666                         goto err_late;
667                 }
668         }
669
670         /* Copy the permanent MAC address out of the EEPROM */
671         if (e1000_read_mac_addr(hw) < 0) {
672                 device_printf(dev, "EEPROM read error while reading MAC"
673                     " address\n");
674                 error = EIO;
675                 goto err_late;
676         }
677
678         if (!em_is_valid_ether_addr(hw->mac.addr)) {
679                 device_printf(dev, "Invalid MAC address\n");
680                 error = EIO;
681                 goto err_late;
682         }
683
684         /*
685         **  Do interrupt configuration
686         */
687         if (adapter->msix > 1) /* Do MSIX */
688                 error = em_allocate_msix(adapter);
689         else  /* MSI or Legacy */
690                 error = em_allocate_legacy(adapter);
691         if (error)
692                 goto err_late;
693
694         /*
695          * Get Wake-on-Lan and Management info for later use
696          */
697         em_get_wakeup(dev);
698
699         /* Setup OS specific network interface */
700         if (em_setup_interface(dev, adapter) != 0)
701                 goto err_late;
702
703         em_reset(adapter);
704
705         /* Initialize statistics */
706         em_update_stats_counters(adapter);
707
708         hw->mac.get_link_status = 1;
709         em_update_link_status(adapter);
710
711         /* Register for VLAN events */
712         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
716
717         em_add_hw_stats(adapter);
718
719         /* Non-AMT based hardware can now take control from firmware */
720         if (adapter->has_manage && !adapter->has_amt)
721                 em_get_hw_control(adapter);
722
723         /* Tell the stack that the interface is not active */
724         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727         adapter->led_dev = led_create(em_led_func, adapter,
728             device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730         em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732
733         INIT_DEBUGOUT("em_attach: end");
734
735         return (0);
736
737 err_late:
738         em_free_transmit_structures(adapter);
739         em_free_receive_structures(adapter);
740         em_release_hw_control(adapter);
741         if (adapter->ifp != NULL)
742                 if_free(adapter->ifp);
743 err_pci:
744         em_free_pci_resources(adapter);
745         free(adapter->mta, M_DEVBUF);
746         EM_CORE_LOCK_DESTROY(adapter);
747
748         return (error);
749 }
750
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760
761 static int
762 em_detach(device_t dev)
763 {
764         struct adapter  *adapter = device_get_softc(dev);
765         struct ifnet    *ifp = adapter->ifp;
766
767         INIT_DEBUGOUT("em_detach: begin");
768
769         /* Make sure VLANS are not using driver */
770         if (adapter->ifp->if_vlantrunk != NULL) {
771                 device_printf(dev,"Vlan in use, detach first\n");
772                 return (EBUSY);
773         }
774
775 #ifdef DEVICE_POLLING
776         if (ifp->if_capenable & IFCAP_POLLING)
777                 ether_poll_deregister(ifp);
778 #endif
779
780         if (adapter->led_dev != NULL)
781                 led_destroy(adapter->led_dev);
782
783         EM_CORE_LOCK(adapter);
784         adapter->in_detach = 1;
785         em_stop(adapter);
786         EM_CORE_UNLOCK(adapter);
787         EM_CORE_LOCK_DESTROY(adapter);
788
789         e1000_phy_hw_reset(&adapter->hw);
790
791         em_release_manageability(adapter);
792         em_release_hw_control(adapter);
793
794         /* Unregister VLAN events */
795         if (adapter->vlan_attach != NULL)
796                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797         if (adapter->vlan_detach != NULL)
798                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
799
800         ether_ifdetach(adapter->ifp);
801         callout_drain(&adapter->timer);
802
803 #ifdef DEV_NETMAP
804         netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806
807         em_free_pci_resources(adapter);
808         bus_generic_detach(dev);
809         if_free(ifp);
810
811         em_free_transmit_structures(adapter);
812         em_free_receive_structures(adapter);
813
814         em_release_hw_control(adapter);
815         free(adapter->mta, M_DEVBUF);
816
817         return (0);
818 }
819
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825
826 static int
827 em_shutdown(device_t dev)
828 {
829         return em_suspend(dev);
830 }
831
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838         struct adapter *adapter = device_get_softc(dev);
839
840         EM_CORE_LOCK(adapter);
841
842         em_release_manageability(adapter);
843         em_release_hw_control(adapter);
844         em_enable_wakeup(dev);
845
846         EM_CORE_UNLOCK(adapter);
847
848         return bus_generic_suspend(dev);
849 }
850
851 static int
852 em_resume(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855         struct tx_ring  *txr = adapter->tx_rings;
856         struct ifnet *ifp = adapter->ifp;
857
858         EM_CORE_LOCK(adapter);
859         if (adapter->hw.mac.type == e1000_pch2lan)
860                 e1000_resume_workarounds_pchlan(&adapter->hw);
861         em_init_locked(adapter);
862         em_init_manageability(adapter);
863
864         if ((ifp->if_flags & IFF_UP) &&
865             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                         EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869                         if (!drbr_empty(ifp, txr->br))
870                                 em_mq_start_locked(ifp, txr, NULL);
871 #else
872                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873                                 em_start_locked(ifp, txr);
874 #endif
875                         EM_TX_UNLOCK(txr);
876                 }
877         }
878         EM_CORE_UNLOCK(adapter);
879
880         return bus_generic_resume(dev);
881 }
882
883
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines 
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896         struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899
900         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901             IFF_DRV_RUNNING || adapter->link_active == 0) {
902                 if (m != NULL)
903                         err = drbr_enqueue(ifp, txr->br, m);
904                 return (err);
905         }
906
907         enq = 0;
908         if (m == NULL) {
909                 next = drbr_dequeue(ifp, txr->br);
910         } else if (drbr_needs_enqueue(ifp, txr->br)) {
911                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912                         return (err);
913                 next = drbr_dequeue(ifp, txr->br);
914         } else
915                 next = m;
916
917         /* Process the queue */
918         while (next != NULL) {
919                 if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923                 }
924                 enq++;
925                 ifp->if_obytes += next->m_pkthdr.len;
926                 if (next->m_flags & M_MCAST)
927                         ifp->if_omcasts++;
928                 ETHER_BPF_MTAP(ifp, next);
929                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
930                         break;
931                 next = drbr_dequeue(ifp, txr->br);
932         }
933
934         if (enq > 0) {
935                 /* Set the watchdog */
936                 txr->queue_status = EM_QUEUE_WORKING;
937                 txr->watchdog_time = ticks;
938         }
939
940         if (txr->tx_avail < EM_MAX_SCATTER)
941                 em_txeof(txr);
942         if (txr->tx_avail < EM_MAX_SCATTER)
943                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
944         return (err);
945 }
946
947 /*
948 ** Multiqueue capable stack interface
949 */
950 static int
951 em_mq_start(struct ifnet *ifp, struct mbuf *m)
952 {
953         struct adapter  *adapter = ifp->if_softc;
954         struct tx_ring  *txr = adapter->tx_rings;
955         int             error;
956
957         if (EM_TX_TRYLOCK(txr)) {
958                 error = em_mq_start_locked(ifp, txr, m);
959                 EM_TX_UNLOCK(txr);
960         } else 
961                 error = drbr_enqueue(ifp, txr->br, m);
962
963         return (error);
964 }
965
966 /*
967 ** Flush all ring buffers
968 */
969 static void
970 em_qflush(struct ifnet *ifp)
971 {
972         struct adapter  *adapter = ifp->if_softc;
973         struct tx_ring  *txr = adapter->tx_rings;
974         struct mbuf     *m;
975
976         for (int i = 0; i < adapter->num_queues; i++, txr++) {
977                 EM_TX_LOCK(txr);
978                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
979                         m_freem(m);
980                 EM_TX_UNLOCK(txr);
981         }
982         if_qflush(ifp);
983 }
984 #else  /* !EM_MULTIQUEUE */
985
986 static void
987 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988 {
989         struct adapter  *adapter = ifp->if_softc;
990         struct mbuf     *m_head;
991
992         EM_TX_LOCK_ASSERT(txr);
993
994         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995             IFF_DRV_RUNNING)
996                 return;
997
998         if (!adapter->link_active)
999                 return;
1000
1001         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1002                 /* Call cleanup if number of TX descriptors low */
1003                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1004                         em_txeof(txr);
1005                 if (txr->tx_avail < EM_MAX_SCATTER) {
1006                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1007                         break;
1008                 }
1009                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010                 if (m_head == NULL)
1011                         break;
1012                 /*
1013                  *  Encapsulation can modify our pointer, and or make it
1014                  *  NULL on failure.  In that event, we can't requeue.
1015                  */
1016                 if (em_xmit(txr, &m_head)) {
1017                         if (m_head == NULL)
1018                                 break;
1019                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020                         break;
1021                 }
1022
1023                 /* Send a copy of the frame to the BPF listener */
1024                 ETHER_BPF_MTAP(ifp, m_head);
1025
1026                 /* Set timeout in case hardware has problems transmitting. */
1027                 txr->watchdog_time = ticks;
1028                 txr->queue_status = EM_QUEUE_WORKING;
1029         }
1030
1031         return;
1032 }
1033
1034 static void
1035 em_start(struct ifnet *ifp)
1036 {
1037         struct adapter  *adapter = ifp->if_softc;
1038         struct tx_ring  *txr = adapter->tx_rings;
1039
1040         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1041                 EM_TX_LOCK(txr);
1042                 em_start_locked(ifp, txr);
1043                 EM_TX_UNLOCK(txr);
1044         }
1045         return;
1046 }
1047 #endif /* EM_MULTIQUEUE */
1048
1049 /*********************************************************************
1050  *  Ioctl entry point
1051  *
1052  *  em_ioctl is called when the user wants to configure the
1053  *  interface.
1054  *
1055  *  return 0 on success, positive on failure
1056  **********************************************************************/
1057
1058 static int
1059 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1060 {
1061         struct adapter  *adapter = ifp->if_softc;
1062         struct ifreq    *ifr = (struct ifreq *)data;
1063 #if defined(INET) || defined(INET6)
1064         struct ifaddr   *ifa = (struct ifaddr *)data;
1065 #endif
1066         bool            avoid_reset = FALSE;
1067         int             error = 0;
1068
1069         if (adapter->in_detach)
1070                 return (error);
1071
1072         switch (command) {
1073         case SIOCSIFADDR:
1074 #ifdef INET
1075                 if (ifa->ifa_addr->sa_family == AF_INET)
1076                         avoid_reset = TRUE;
1077 #endif
1078 #ifdef INET6
1079                 if (ifa->ifa_addr->sa_family == AF_INET6)
1080                         avoid_reset = TRUE;
1081 #endif
1082                 /*
1083                 ** Calling init results in link renegotiation,
1084                 ** so we avoid doing it when possible.
1085                 */
1086                 if (avoid_reset) {
1087                         ifp->if_flags |= IFF_UP;
1088                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1089                                 em_init(adapter);
1090 #ifdef INET
1091                         if (!(ifp->if_flags & IFF_NOARP))
1092                                 arp_ifinit(ifp, ifa);
1093 #endif
1094                 } else
1095                         error = ether_ioctl(ifp, command, data);
1096                 break;
1097         case SIOCSIFMTU:
1098             {
1099                 int max_frame_size;
1100
1101                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1102
1103                 EM_CORE_LOCK(adapter);
1104                 switch (adapter->hw.mac.type) {
1105                 case e1000_82571:
1106                 case e1000_82572:
1107                 case e1000_ich9lan:
1108                 case e1000_ich10lan:
1109                 case e1000_pch2lan:
1110                 case e1000_82574:
1111                 case e1000_82583:
1112                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1113                         max_frame_size = 9234;
1114                         break;
1115                 case e1000_pchlan:
1116                         max_frame_size = 4096;
1117                         break;
1118                         /* Adapters that do not support jumbo frames */
1119                 case e1000_ich8lan:
1120                         max_frame_size = ETHER_MAX_LEN;
1121                         break;
1122                 default:
1123                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1124                 }
1125                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1126                     ETHER_CRC_LEN) {
1127                         EM_CORE_UNLOCK(adapter);
1128                         error = EINVAL;
1129                         break;
1130                 }
1131
1132                 ifp->if_mtu = ifr->ifr_mtu;
1133                 adapter->max_frame_size =
1134                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1135                 em_init_locked(adapter);
1136                 EM_CORE_UNLOCK(adapter);
1137                 break;
1138             }
1139         case SIOCSIFFLAGS:
1140                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1141                     SIOCSIFFLAGS (Set Interface Flags)");
1142                 EM_CORE_LOCK(adapter);
1143                 if (ifp->if_flags & IFF_UP) {
1144                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1145                                 if ((ifp->if_flags ^ adapter->if_flags) &
1146                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1147                                         em_disable_promisc(adapter);
1148                                         em_set_promisc(adapter);
1149                                 }
1150                         } else
1151                                 em_init_locked(adapter);
1152                 } else
1153                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1154                                 em_stop(adapter);
1155                 adapter->if_flags = ifp->if_flags;
1156                 EM_CORE_UNLOCK(adapter);
1157                 break;
1158         case SIOCADDMULTI:
1159         case SIOCDELMULTI:
1160                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1161                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1162                         EM_CORE_LOCK(adapter);
1163                         em_disable_intr(adapter);
1164                         em_set_multi(adapter);
1165 #ifdef DEVICE_POLLING
1166                         if (!(ifp->if_capenable & IFCAP_POLLING))
1167 #endif
1168                                 em_enable_intr(adapter);
1169                         EM_CORE_UNLOCK(adapter);
1170                 }
1171                 break;
1172         case SIOCSIFMEDIA:
1173                 /* Check SOL/IDER usage */
1174                 EM_CORE_LOCK(adapter);
1175                 if (e1000_check_reset_block(&adapter->hw)) {
1176                         EM_CORE_UNLOCK(adapter);
1177                         device_printf(adapter->dev, "Media change is"
1178                             " blocked due to SOL/IDER session.\n");
1179                         break;
1180                 }
1181                 EM_CORE_UNLOCK(adapter);
1182                 /* falls thru */
1183         case SIOCGIFMEDIA:
1184                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1185                     SIOCxIFMEDIA (Get/Set Interface Media)");
1186                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1187                 break;
1188         case SIOCSIFCAP:
1189             {
1190                 int mask, reinit;
1191
1192                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1193                 reinit = 0;
1194                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1195 #ifdef DEVICE_POLLING
1196                 if (mask & IFCAP_POLLING) {
1197                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1198                                 error = ether_poll_register(em_poll, ifp);
1199                                 if (error)
1200                                         return (error);
1201                                 EM_CORE_LOCK(adapter);
1202                                 em_disable_intr(adapter);
1203                                 ifp->if_capenable |= IFCAP_POLLING;
1204                                 EM_CORE_UNLOCK(adapter);
1205                         } else {
1206                                 error = ether_poll_deregister(ifp);
1207                                 /* Enable interrupt even in error case */
1208                                 EM_CORE_LOCK(adapter);
1209                                 em_enable_intr(adapter);
1210                                 ifp->if_capenable &= ~IFCAP_POLLING;
1211                                 EM_CORE_UNLOCK(adapter);
1212                         }
1213                 }
1214 #endif
1215                 if (mask & IFCAP_HWCSUM) {
1216                         ifp->if_capenable ^= IFCAP_HWCSUM;
1217                         reinit = 1;
1218                 }
1219                 if (mask & IFCAP_TSO4) {
1220                         ifp->if_capenable ^= IFCAP_TSO4;
1221                         reinit = 1;
1222                 }
1223                 if (mask & IFCAP_VLAN_HWTAGGING) {
1224                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1225                         reinit = 1;
1226                 }
1227                 if (mask & IFCAP_VLAN_HWFILTER) {
1228                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1229                         reinit = 1;
1230                 }
1231                 if (mask & IFCAP_VLAN_HWTSO) {
1232                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1233                         reinit = 1;
1234                 }
1235                 if ((mask & IFCAP_WOL) &&
1236                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1237                         if (mask & IFCAP_WOL_MCAST)
1238                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1239                         if (mask & IFCAP_WOL_MAGIC)
1240                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1241                 }
1242                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1243                         em_init(adapter);
1244                 VLAN_CAPABILITIES(ifp);
1245                 break;
1246             }
1247
1248         default:
1249                 error = ether_ioctl(ifp, command, data);
1250                 break;
1251         }
1252
1253         return (error);
1254 }
1255
1256
1257 /*********************************************************************
1258  *  Init entry point
1259  *
1260  *  This routine is used in two ways. It is used by the stack as
1261  *  init entry point in network interface structure. It is also used
1262  *  by the driver as a hw/sw initialization routine to get to a
1263  *  consistent state.
1264  *
1265  *  return 0 on success, positive on failure
1266  **********************************************************************/
1267
1268 static void
1269 em_init_locked(struct adapter *adapter)
1270 {
1271         struct ifnet    *ifp = adapter->ifp;
1272         device_t        dev = adapter->dev;
1273
1274         INIT_DEBUGOUT("em_init: begin");
1275
1276         EM_CORE_LOCK_ASSERT(adapter);
1277
1278         em_disable_intr(adapter);
1279         callout_stop(&adapter->timer);
1280
1281         /* Get the latest mac address, User can use a LAA */
1282         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1283               ETHER_ADDR_LEN);
1284
1285         /* Put the address into the Receive Address Array */
1286         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1287
1288         /*
1289          * With the 82571 adapter, RAR[0] may be overwritten
1290          * when the other port is reset, we make a duplicate
1291          * in RAR[14] for that eventuality, this assures
1292          * the interface continues to function.
1293          */
1294         if (adapter->hw.mac.type == e1000_82571) {
1295                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1296                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1297                     E1000_RAR_ENTRIES - 1);
1298         }
1299
1300         /* Initialize the hardware */
1301         em_reset(adapter);
1302         em_update_link_status(adapter);
1303
1304         /* Setup VLAN support, basic and offload if available */
1305         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1306
1307         /* Set hardware offload abilities */
1308         ifp->if_hwassist = 0;
1309         if (ifp->if_capenable & IFCAP_TXCSUM)
1310                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1311         if (ifp->if_capenable & IFCAP_TSO4)
1312                 ifp->if_hwassist |= CSUM_TSO;
1313
1314         /* Configure for OS presence */
1315         em_init_manageability(adapter);
1316
1317         /* Prepare transmit descriptors and buffers */
1318         em_setup_transmit_structures(adapter);
1319         em_initialize_transmit_unit(adapter);
1320
1321         /* Setup Multicast table */
1322         em_set_multi(adapter);
1323
1324         /*
1325         ** Figure out the desired mbuf
1326         ** pool for doing jumbos
1327         */
1328         if (adapter->max_frame_size <= 2048)
1329                 adapter->rx_mbuf_sz = MCLBYTES;
1330         else if (adapter->max_frame_size <= 4096)
1331                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1332         else
1333                 adapter->rx_mbuf_sz = MJUM9BYTES;
1334
1335         /* Prepare receive descriptors and buffers */
1336         if (em_setup_receive_structures(adapter)) {
1337                 device_printf(dev, "Could not setup receive structures\n");
1338                 em_stop(adapter);
1339                 return;
1340         }
1341         em_initialize_receive_unit(adapter);
1342
1343         /* Use real VLAN Filter support? */
1344         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1345                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1346                         /* Use real VLAN Filter support */
1347                         em_setup_vlan_hw_support(adapter);
1348                 else {
1349                         u32 ctrl;
1350                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1351                         ctrl |= E1000_CTRL_VME;
1352                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1353                 }
1354         }
1355
1356         /* Don't lose promiscuous settings */
1357         em_set_promisc(adapter);
1358
1359         /* Set the interface as ACTIVE */
1360         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1361         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1362
1363         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1364         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1365
1366         /* MSI/X configuration for 82574 */
1367         if (adapter->hw.mac.type == e1000_82574) {
1368                 int tmp;
1369                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1370                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1371                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1372                 /* Set the IVAR - interrupt vector routing. */
1373                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1374         }
1375
1376 #ifdef DEVICE_POLLING
1377         /*
1378          * Only enable interrupts if we are not polling, make sure
1379          * they are off otherwise.
1380          */
1381         if (ifp->if_capenable & IFCAP_POLLING)
1382                 em_disable_intr(adapter);
1383         else
1384 #endif /* DEVICE_POLLING */
1385                 em_enable_intr(adapter);
1386
1387         /* AMT based hardware can now take control from firmware */
1388         if (adapter->has_manage && adapter->has_amt)
1389                 em_get_hw_control(adapter);
1390 }
1391
1392 static void
1393 em_init(void *arg)
1394 {
1395         struct adapter *adapter = arg;
1396
1397         EM_CORE_LOCK(adapter);
1398         em_init_locked(adapter);
1399         EM_CORE_UNLOCK(adapter);
1400 }
1401
1402
1403 #ifdef DEVICE_POLLING
1404 /*********************************************************************
1405  *
1406  *  Legacy polling routine: note this only works with single queue
1407  *
1408  *********************************************************************/
1409 static int
1410 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1411 {
1412         struct adapter *adapter = ifp->if_softc;
1413         struct tx_ring  *txr = adapter->tx_rings;
1414         struct rx_ring  *rxr = adapter->rx_rings;
1415         u32             reg_icr;
1416         int             rx_done;
1417
1418         EM_CORE_LOCK(adapter);
1419         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1420                 EM_CORE_UNLOCK(adapter);
1421                 return (0);
1422         }
1423
1424         if (cmd == POLL_AND_CHECK_STATUS) {
1425                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1426                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1427                         callout_stop(&adapter->timer);
1428                         adapter->hw.mac.get_link_status = 1;
1429                         em_update_link_status(adapter);
1430                         callout_reset(&adapter->timer, hz,
1431                             em_local_timer, adapter);
1432                 }
1433         }
1434         EM_CORE_UNLOCK(adapter);
1435
1436         em_rxeof(rxr, count, &rx_done);
1437
1438         EM_TX_LOCK(txr);
1439         em_txeof(txr);
1440 #ifdef EM_MULTIQUEUE
1441         if (!drbr_empty(ifp, txr->br))
1442                 em_mq_start_locked(ifp, txr, NULL);
1443 #else
1444         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445                 em_start_locked(ifp, txr);
1446 #endif
1447         EM_TX_UNLOCK(txr);
1448
1449         return (rx_done);
1450 }
1451 #endif /* DEVICE_POLLING */
1452
1453
1454 /*********************************************************************
1455  *
1456  *  Fast Legacy/MSI Combined Interrupt Service routine  
1457  *
1458  *********************************************************************/
1459 static int
1460 em_irq_fast(void *arg)
1461 {
1462         struct adapter  *adapter = arg;
1463         struct ifnet    *ifp;
1464         u32             reg_icr;
1465
1466         ifp = adapter->ifp;
1467
1468         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1469
1470         /* Hot eject?  */
1471         if (reg_icr == 0xffffffff)
1472                 return FILTER_STRAY;
1473
1474         /* Definitely not our interrupt.  */
1475         if (reg_icr == 0x0)
1476                 return FILTER_STRAY;
1477
1478         /*
1479          * Starting with the 82571 chip, bit 31 should be used to
1480          * determine whether the interrupt belongs to us.
1481          */
1482         if (adapter->hw.mac.type >= e1000_82571 &&
1483             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1484                 return FILTER_STRAY;
1485
1486         em_disable_intr(adapter);
1487         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1488
1489         /* Link status change */
1490         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1491                 adapter->hw.mac.get_link_status = 1;
1492                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1493         }
1494
1495         if (reg_icr & E1000_ICR_RXO)
1496                 adapter->rx_overruns++;
1497         return FILTER_HANDLED;
1498 }
1499
1500 /* Combined RX/TX handler, used by Legacy and MSI */
1501 static void
1502 em_handle_que(void *context, int pending)
1503 {
1504         struct adapter  *adapter = context;
1505         struct ifnet    *ifp = adapter->ifp;
1506         struct tx_ring  *txr = adapter->tx_rings;
1507         struct rx_ring  *rxr = adapter->rx_rings;
1508
1509
1510         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1511                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1512                 EM_TX_LOCK(txr);
1513                 em_txeof(txr);
1514 #ifdef EM_MULTIQUEUE
1515                 if (!drbr_empty(ifp, txr->br))
1516                         em_mq_start_locked(ifp, txr, NULL);
1517 #else
1518                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1519                         em_start_locked(ifp, txr);
1520 #endif
1521                 EM_TX_UNLOCK(txr);
1522                 if (more) {
1523                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1524                         return;
1525                 }
1526         }
1527
1528         em_enable_intr(adapter);
1529         return;
1530 }
1531
1532
1533 /*********************************************************************
1534  *
1535  *  MSIX Interrupt Service Routines
1536  *
1537  **********************************************************************/
1538 static void
1539 em_msix_tx(void *arg)
1540 {
1541         struct tx_ring *txr = arg;
1542         struct adapter *adapter = txr->adapter;
1543         struct ifnet    *ifp = adapter->ifp;
1544
1545         ++txr->tx_irq;
1546         EM_TX_LOCK(txr);
1547         em_txeof(txr);
1548 #ifdef EM_MULTIQUEUE
1549         if (!drbr_empty(ifp, txr->br))
1550                 em_mq_start_locked(ifp, txr, NULL);
1551 #else
1552         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553                 em_start_locked(ifp, txr);
1554 #endif
1555         /* Reenable this interrupt */
1556         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1557         EM_TX_UNLOCK(txr);
1558         return;
1559 }
1560
1561 /*********************************************************************
1562  *
1563  *  MSIX RX Interrupt Service routine
1564  *
1565  **********************************************************************/
1566
1567 static void
1568 em_msix_rx(void *arg)
1569 {
1570         struct rx_ring  *rxr = arg;
1571         struct adapter  *adapter = rxr->adapter;
1572         bool            more;
1573
1574         ++rxr->rx_irq;
1575         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1576                 return;
1577         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1578         if (more)
1579                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1580         else
1581                 /* Reenable this interrupt */
1582                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1583         return;
1584 }
1585
1586 /*********************************************************************
1587  *
1588  *  MSIX Link Fast Interrupt Service routine
1589  *
1590  **********************************************************************/
1591 static void
1592 em_msix_link(void *arg)
1593 {
1594         struct adapter  *adapter = arg;
1595         u32             reg_icr;
1596
1597         ++adapter->link_irq;
1598         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1599
1600         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1601                 adapter->hw.mac.get_link_status = 1;
1602                 em_handle_link(adapter, 0);
1603         } else
1604                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1605                     EM_MSIX_LINK | E1000_IMS_LSC);
1606         return;
1607 }
1608
1609 static void
1610 em_handle_rx(void *context, int pending)
1611 {
1612         struct rx_ring  *rxr = context;
1613         struct adapter  *adapter = rxr->adapter;
1614         bool            more;
1615
1616         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1617         if (more)
1618                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1619         else
1620                 /* Reenable this interrupt */
1621                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1622 }
1623
1624 static void
1625 em_handle_tx(void *context, int pending)
1626 {
1627         struct tx_ring  *txr = context;
1628         struct adapter  *adapter = txr->adapter;
1629         struct ifnet    *ifp = adapter->ifp;
1630
1631         EM_TX_LOCK(txr);
1632         em_txeof(txr);
1633 #ifdef EM_MULTIQUEUE
1634         if (!drbr_empty(ifp, txr->br))
1635                 em_mq_start_locked(ifp, txr, NULL);
1636 #else
1637         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1638                 em_start_locked(ifp, txr);
1639 #endif
1640         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1641         EM_TX_UNLOCK(txr);
1642 }
1643
1644 static void
1645 em_handle_link(void *context, int pending)
1646 {
1647         struct adapter  *adapter = context;
1648         struct tx_ring  *txr = adapter->tx_rings;
1649         struct ifnet *ifp = adapter->ifp;
1650
1651         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1652                 return;
1653
1654         EM_CORE_LOCK(adapter);
1655         callout_stop(&adapter->timer);
1656         em_update_link_status(adapter);
1657         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1658         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1659             EM_MSIX_LINK | E1000_IMS_LSC);
1660         if (adapter->link_active) {
1661                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1662                         EM_TX_LOCK(txr);
1663 #ifdef EM_MULTIQUEUE
1664                         if (!drbr_empty(ifp, txr->br))
1665                                 em_mq_start_locked(ifp, txr, NULL);
1666 #else
1667                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1668                                 em_start_locked(ifp, txr);
1669 #endif
1670                         EM_TX_UNLOCK(txr);
1671                 }
1672         }
1673         EM_CORE_UNLOCK(adapter);
1674 }
1675
1676
1677 /*********************************************************************
1678  *
1679  *  Media Ioctl callback
1680  *
1681  *  This routine is called whenever the user queries the status of
1682  *  the interface using ifconfig.
1683  *
1684  **********************************************************************/
1685 static void
1686 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1687 {
1688         struct adapter *adapter = ifp->if_softc;
1689         u_char fiber_type = IFM_1000_SX;
1690
1691         INIT_DEBUGOUT("em_media_status: begin");
1692
1693         EM_CORE_LOCK(adapter);
1694         em_update_link_status(adapter);
1695
1696         ifmr->ifm_status = IFM_AVALID;
1697         ifmr->ifm_active = IFM_ETHER;
1698
1699         if (!adapter->link_active) {
1700                 EM_CORE_UNLOCK(adapter);
1701                 return;
1702         }
1703
1704         ifmr->ifm_status |= IFM_ACTIVE;
1705
1706         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1707             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1708                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1709         } else {
1710                 switch (adapter->link_speed) {
1711                 case 10:
1712                         ifmr->ifm_active |= IFM_10_T;
1713                         break;
1714                 case 100:
1715                         ifmr->ifm_active |= IFM_100_TX;
1716                         break;
1717                 case 1000:
1718                         ifmr->ifm_active |= IFM_1000_T;
1719                         break;
1720                 }
1721                 if (adapter->link_duplex == FULL_DUPLEX)
1722                         ifmr->ifm_active |= IFM_FDX;
1723                 else
1724                         ifmr->ifm_active |= IFM_HDX;
1725         }
1726         EM_CORE_UNLOCK(adapter);
1727 }
1728
1729 /*********************************************************************
1730  *
1731  *  Media Ioctl callback
1732  *
1733  *  This routine is called when the user changes speed/duplex using
1734  *  media/mediopt option with ifconfig.
1735  *
1736  **********************************************************************/
1737 static int
1738 em_media_change(struct ifnet *ifp)
1739 {
1740         struct adapter *adapter = ifp->if_softc;
1741         struct ifmedia  *ifm = &adapter->media;
1742
1743         INIT_DEBUGOUT("em_media_change: begin");
1744
1745         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1746                 return (EINVAL);
1747
1748         EM_CORE_LOCK(adapter);
1749         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1750         case IFM_AUTO:
1751                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1752                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1753                 break;
1754         case IFM_1000_LX:
1755         case IFM_1000_SX:
1756         case IFM_1000_T:
1757                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1758                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1759                 break;
1760         case IFM_100_TX:
1761                 adapter->hw.mac.autoneg = FALSE;
1762                 adapter->hw.phy.autoneg_advertised = 0;
1763                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1765                 else
1766                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1767                 break;
1768         case IFM_10_T:
1769                 adapter->hw.mac.autoneg = FALSE;
1770                 adapter->hw.phy.autoneg_advertised = 0;
1771                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1772                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1773                 else
1774                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1775                 break;
1776         default:
1777                 device_printf(adapter->dev, "Unsupported media type\n");
1778         }
1779
1780         em_init_locked(adapter);
1781         EM_CORE_UNLOCK(adapter);
1782
1783         return (0);
1784 }
1785
1786 /*********************************************************************
1787  *
1788  *  This routine maps the mbufs to tx descriptors.
1789  *
1790  *  return 0 on success, positive on failure
1791  **********************************************************************/
1792
1793 static int
1794 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795 {
1796         struct adapter          *adapter = txr->adapter;
1797         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1798         bus_dmamap_t            map;
1799         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1800         struct e1000_tx_desc    *ctxd = NULL;
1801         struct mbuf             *m_head;
1802         struct ether_header     *eh;
1803         struct ip               *ip = NULL;
1804         struct tcphdr           *tp = NULL;
1805         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1806         int                     ip_off, poff;
1807         int                     nsegs, i, j, first, last = 0;
1808         int                     error, do_tso, tso_desc = 0, remap = 1;
1809
1810 retry:
1811         m_head = *m_headp;
1812         txd_upper = txd_lower = txd_used = txd_saved = 0;
1813         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1814         ip_off = poff = 0;
1815
1816         /*
1817          * Intel recommends entire IP/TCP header length reside in a single
1818          * buffer. If multiple descriptors are used to describe the IP and
1819          * TCP header, each descriptor should describe one or more
1820          * complete headers; descriptors referencing only parts of headers
1821          * are not supported. If all layer headers are not coalesced into
1822          * a single buffer, each buffer should not cross a 4KB boundary,
1823          * or be larger than the maximum read request size.
1824          * Controller also requires modifing IP/TCP header to make TSO work
1825          * so we firstly get a writable mbuf chain then coalesce ethernet/
1826          * IP/TCP header into a single buffer to meet the requirement of
1827          * controller. This also simplifies IP/TCP/UDP checksum offloading
1828          * which also has similiar restrictions.
1829          */
1830         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1831                 if (do_tso || (m_head->m_next != NULL && 
1832                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1833                         if (M_WRITABLE(*m_headp) == 0) {
1834                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1835                                 m_freem(*m_headp);
1836                                 if (m_head == NULL) {
1837                                         *m_headp = NULL;
1838                                         return (ENOBUFS);
1839                                 }
1840                                 *m_headp = m_head;
1841                         }
1842                 }
1843                 /*
1844                  * XXX
1845                  * Assume IPv4, we don't have TSO/checksum offload support
1846                  * for IPv6 yet.
1847                  */
1848                 ip_off = sizeof(struct ether_header);
1849                 m_head = m_pullup(m_head, ip_off);
1850                 if (m_head == NULL) {
1851                         *m_headp = NULL;
1852                         return (ENOBUFS);
1853                 }
1854                 eh = mtod(m_head, struct ether_header *);
1855                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1856                         ip_off = sizeof(struct ether_vlan_header);
1857                         m_head = m_pullup(m_head, ip_off);
1858                         if (m_head == NULL) {
1859                                 *m_headp = NULL;
1860                                 return (ENOBUFS);
1861                         }
1862                 }
1863                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1864                 if (m_head == NULL) {
1865                         *m_headp = NULL;
1866                         return (ENOBUFS);
1867                 }
1868                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1869                 poff = ip_off + (ip->ip_hl << 2);
1870                 if (do_tso) {
1871                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1872                         if (m_head == NULL) {
1873                                 *m_headp = NULL;
1874                                 return (ENOBUFS);
1875                         }
1876                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1877                         /*
1878                          * TSO workaround:
1879                          *   pull 4 more bytes of data into it.
1880                          */
1881                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1882                         if (m_head == NULL) {
1883                                 *m_headp = NULL;
1884                                 return (ENOBUFS);
1885                         }
1886                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1887                         ip->ip_len = 0;
1888                         ip->ip_sum = 0;
1889                         /*
1890                          * The pseudo TCP checksum does not include TCP payload
1891                          * length so driver should recompute the checksum here
1892                          * what hardware expect to see. This is adherence of
1893                          * Microsoft's Large Send specification.
1894                          */
1895                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1896                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1897                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1898                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1899                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1900                         if (m_head == NULL) {
1901                                 *m_headp = NULL;
1902                                 return (ENOBUFS);
1903                         }
1904                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1905                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1906                         if (m_head == NULL) {
1907                                 *m_headp = NULL;
1908                                 return (ENOBUFS);
1909                         }
1910                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1913                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1914                         if (m_head == NULL) {
1915                                 *m_headp = NULL;
1916                                 return (ENOBUFS);
1917                         }
1918                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1919                 }
1920                 *m_headp = m_head;
1921         }
1922
1923         /*
1924          * Map the packet for DMA
1925          *
1926          * Capture the first descriptor index,
1927          * this descriptor will have the index
1928          * of the EOP which is the only one that
1929          * now gets a DONE bit writeback.
1930          */
1931         first = txr->next_avail_desc;
1932         tx_buffer = &txr->tx_buffers[first];
1933         tx_buffer_mapped = tx_buffer;
1934         map = tx_buffer->map;
1935
1936         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1937             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1938
1939         /*
1940          * There are two types of errors we can (try) to handle:
1941          * - EFBIG means the mbuf chain was too long and bus_dma ran
1942          *   out of segments.  Defragment the mbuf chain and try again.
1943          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1944          *   at this point in time.  Defer sending and try again later.
1945          * All other errors, in particular EINVAL, are fatal and prevent the
1946          * mbuf chain from ever going through.  Drop it and report error.
1947          */
1948         if (error == EFBIG && remap) {
1949                 struct mbuf *m;
1950
1951                 m = m_defrag(*m_headp, M_DONTWAIT);
1952                 if (m == NULL) {
1953                         adapter->mbuf_alloc_failed++;
1954                         m_freem(*m_headp);
1955                         *m_headp = NULL;
1956                         return (ENOBUFS);
1957                 }
1958                 *m_headp = m;
1959
1960                 /* Try it again, but only once */
1961                 remap = 0;
1962                 goto retry;
1963         } else if (error == ENOMEM) {
1964                 adapter->no_tx_dma_setup++;
1965                 return (error);
1966         } else if (error != 0) {
1967                 adapter->no_tx_dma_setup++;
1968                 m_freem(*m_headp);
1969                 *m_headp = NULL;
1970                 return (error);
1971         }
1972
1973         /*
1974          * TSO Hardware workaround, if this packet is not
1975          * TSO, and is only a single descriptor long, and
1976          * it follows a TSO burst, then we need to add a
1977          * sentinel descriptor to prevent premature writeback.
1978          */
1979         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1980                 if (nsegs == 1)
1981                         tso_desc = TRUE;
1982                 txr->tx_tso = FALSE;
1983         }
1984
1985         if (nsegs > (txr->tx_avail - 2)) {
1986                 txr->no_desc_avail++;
1987                 bus_dmamap_unload(txr->txtag, map);
1988                 return (ENOBUFS);
1989         }
1990         m_head = *m_headp;
1991
1992         /* Do hardware assists */
1993         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1994                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1995                     &txd_upper, &txd_lower);
1996                 /* we need to make a final sentinel transmit desc */
1997                 tso_desc = TRUE;
1998         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1999                 em_transmit_checksum_setup(txr, m_head,
2000                     ip_off, ip, &txd_upper, &txd_lower);
2001
2002         if (m_head->m_flags & M_VLANTAG) {
2003                 /* Set the vlan id. */
2004                 txd_upper |=
2005                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2006                 /* Tell hardware to add tag */
2007                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2008         }
2009
2010         i = txr->next_avail_desc;
2011
2012         /* Set up our transmit descriptors */
2013         for (j = 0; j < nsegs; j++) {
2014                 bus_size_t seg_len;
2015                 bus_addr_t seg_addr;
2016
2017                 tx_buffer = &txr->tx_buffers[i];
2018                 ctxd = &txr->tx_base[i];
2019                 seg_addr = segs[j].ds_addr;
2020                 seg_len  = segs[j].ds_len;
2021                 /*
2022                 ** TSO Workaround:
2023                 ** If this is the last descriptor, we want to
2024                 ** split it so we have a small final sentinel
2025                 */
2026                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2027                         seg_len -= 4;
2028                         ctxd->buffer_addr = htole64(seg_addr);
2029                         ctxd->lower.data = htole32(
2030                         adapter->txd_cmd | txd_lower | seg_len);
2031                         ctxd->upper.data =
2032                             htole32(txd_upper);
2033                         if (++i == adapter->num_tx_desc)
2034                                 i = 0;
2035                         /* Now make the sentinel */     
2036                         ++txd_used; /* using an extra txd */
2037                         ctxd = &txr->tx_base[i];
2038                         tx_buffer = &txr->tx_buffers[i];
2039                         ctxd->buffer_addr =
2040                             htole64(seg_addr + seg_len);
2041                         ctxd->lower.data = htole32(
2042                         adapter->txd_cmd | txd_lower | 4);
2043                         ctxd->upper.data =
2044                             htole32(txd_upper);
2045                         last = i;
2046                         if (++i == adapter->num_tx_desc)
2047                                 i = 0;
2048                 } else {
2049                         ctxd->buffer_addr = htole64(seg_addr);
2050                         ctxd->lower.data = htole32(
2051                         adapter->txd_cmd | txd_lower | seg_len);
2052                         ctxd->upper.data =
2053                             htole32(txd_upper);
2054                         last = i;
2055                         if (++i == adapter->num_tx_desc)
2056                                 i = 0;
2057                 }
2058                 tx_buffer->m_head = NULL;
2059                 tx_buffer->next_eop = -1;
2060         }
2061
2062         txr->next_avail_desc = i;
2063         txr->tx_avail -= nsegs;
2064         if (tso_desc) /* TSO used an extra for sentinel */
2065                 txr->tx_avail -= txd_used;
2066
2067         tx_buffer->m_head = m_head;
2068         /*
2069         ** Here we swap the map so the last descriptor,
2070         ** which gets the completion interrupt has the
2071         ** real map, and the first descriptor gets the
2072         ** unused map from this descriptor.
2073         */
2074         tx_buffer_mapped->map = tx_buffer->map;
2075         tx_buffer->map = map;
2076         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2077
2078         /*
2079          * Last Descriptor of Packet
2080          * needs End Of Packet (EOP)
2081          * and Report Status (RS)
2082          */
2083         ctxd->lower.data |=
2084             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2085         /*
2086          * Keep track in the first buffer which
2087          * descriptor will be written back
2088          */
2089         tx_buffer = &txr->tx_buffers[first];
2090         tx_buffer->next_eop = last;
2091         /* Update the watchdog time early and often */
2092         txr->watchdog_time = ticks;
2093
2094         /*
2095          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2096          * that this frame is available to transmit.
2097          */
2098         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2099             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2100         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2101
2102         return (0);
2103 }
2104
2105 static void
2106 em_set_promisc(struct adapter *adapter)
2107 {
2108         struct ifnet    *ifp = adapter->ifp;
2109         u32             reg_rctl;
2110
2111         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2112
2113         if (ifp->if_flags & IFF_PROMISC) {
2114                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2115                 /* Turn this on if you want to see bad packets */
2116                 if (em_debug_sbp)
2117                         reg_rctl |= E1000_RCTL_SBP;
2118                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2119         } else if (ifp->if_flags & IFF_ALLMULTI) {
2120                 reg_rctl |= E1000_RCTL_MPE;
2121                 reg_rctl &= ~E1000_RCTL_UPE;
2122                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2123         }
2124 }
2125
2126 static void
2127 em_disable_promisc(struct adapter *adapter)
2128 {
2129         u32     reg_rctl;
2130
2131         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2132
2133         reg_rctl &=  (~E1000_RCTL_UPE);
2134         reg_rctl &=  (~E1000_RCTL_MPE);
2135         reg_rctl &=  (~E1000_RCTL_SBP);
2136         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2137 }
2138
2139
2140 /*********************************************************************
2141  *  Multicast Update
2142  *
2143  *  This routine is called whenever multicast address list is updated.
2144  *
2145  **********************************************************************/
2146
2147 static void
2148 em_set_multi(struct adapter *adapter)
2149 {
2150         struct ifnet    *ifp = adapter->ifp;
2151         struct ifmultiaddr *ifma;
2152         u32 reg_rctl = 0;
2153         u8  *mta; /* Multicast array memory */
2154         int mcnt = 0;
2155
2156         IOCTL_DEBUGOUT("em_set_multi: begin");
2157
2158         mta = adapter->mta;
2159         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2160
2161         if (adapter->hw.mac.type == e1000_82542 && 
2162             adapter->hw.revision_id == E1000_REVISION_2) {
2163                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2164                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2165                         e1000_pci_clear_mwi(&adapter->hw);
2166                 reg_rctl |= E1000_RCTL_RST;
2167                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2168                 msec_delay(5);
2169         }
2170
2171 #if __FreeBSD_version < 800000
2172         IF_ADDR_LOCK(ifp);
2173 #else
2174         if_maddr_rlock(ifp);
2175 #endif
2176         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2177                 if (ifma->ifma_addr->sa_family != AF_LINK)
2178                         continue;
2179
2180                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2181                         break;
2182
2183                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2184                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2185                 mcnt++;
2186         }
2187 #if __FreeBSD_version < 800000
2188         IF_ADDR_UNLOCK(ifp);
2189 #else
2190         if_maddr_runlock(ifp);
2191 #endif
2192         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2193                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194                 reg_rctl |= E1000_RCTL_MPE;
2195                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196         } else
2197                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2198
2199         if (adapter->hw.mac.type == e1000_82542 && 
2200             adapter->hw.revision_id == E1000_REVISION_2) {
2201                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2202                 reg_rctl &= ~E1000_RCTL_RST;
2203                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2204                 msec_delay(5);
2205                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206                         e1000_pci_set_mwi(&adapter->hw);
2207         }
2208 }
2209
2210
2211 /*********************************************************************
2212  *  Timer routine
2213  *
2214  *  This routine checks for link status and updates statistics.
2215  *
2216  **********************************************************************/
2217
2218 static void
2219 em_local_timer(void *arg)
2220 {
2221         struct adapter  *adapter = arg;
2222         struct ifnet    *ifp = adapter->ifp;
2223         struct tx_ring  *txr = adapter->tx_rings;
2224         struct rx_ring  *rxr = adapter->rx_rings;
2225         u32             trigger;
2226
2227         EM_CORE_LOCK_ASSERT(adapter);
2228
2229         em_update_link_status(adapter);
2230         em_update_stats_counters(adapter);
2231
2232         /* Reset LAA into RAR[0] on 82571 */
2233         if ((adapter->hw.mac.type == e1000_82571) &&
2234             e1000_get_laa_state_82571(&adapter->hw))
2235                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2236
2237         /* Mask to use in the irq trigger */
2238         if (adapter->msix_mem)
2239                 trigger = rxr->ims; /* RX for 82574 */
2240         else
2241                 trigger = E1000_ICS_RXDMT0;
2242
2243         /*
2244         ** Check on the state of the TX queue(s), this 
2245         ** can be done without the lock because its RO
2246         ** and the HUNG state will be static if set.
2247         */
2248         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2249                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2250                     (adapter->pause_frames == 0))
2251                         goto hung;
2252                 /* Schedule a TX tasklet if needed */
2253                 if (txr->tx_avail <= EM_MAX_SCATTER)
2254                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2255         }
2256         
2257         adapter->pause_frames = 0;
2258         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2259 #ifndef DEVICE_POLLING
2260         /* Trigger an RX interrupt to guarantee mbuf refresh */
2261         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2262 #endif
2263         return;
2264 hung:
2265         /* Looks like we're hung */
2266         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2267         device_printf(adapter->dev,
2268             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2269             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2270             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2271         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2272             "Next TX to Clean = %d\n",
2273             txr->me, txr->tx_avail, txr->next_to_clean);
2274         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2275         adapter->watchdog_events++;
2276         adapter->pause_frames = 0;
2277         em_init_locked(adapter);
2278 }
2279
2280
2281 static void
2282 em_update_link_status(struct adapter *adapter)
2283 {
2284         struct e1000_hw *hw = &adapter->hw;
2285         struct ifnet *ifp = adapter->ifp;
2286         device_t dev = adapter->dev;
2287         struct tx_ring *txr = adapter->tx_rings;
2288         u32 link_check = 0;
2289
2290         /* Get the cached link value or read phy for real */
2291         switch (hw->phy.media_type) {
2292         case e1000_media_type_copper:
2293                 if (hw->mac.get_link_status) {
2294                         /* Do the work to read phy */
2295                         e1000_check_for_link(hw);
2296                         link_check = !hw->mac.get_link_status;
2297                         if (link_check) /* ESB2 fix */
2298                                 e1000_cfg_on_link_up(hw);
2299                 } else
2300                         link_check = TRUE;
2301                 break;
2302         case e1000_media_type_fiber:
2303                 e1000_check_for_link(hw);
2304                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2305                                  E1000_STATUS_LU);
2306                 break;
2307         case e1000_media_type_internal_serdes:
2308                 e1000_check_for_link(hw);
2309                 link_check = adapter->hw.mac.serdes_has_link;
2310                 break;
2311         default:
2312         case e1000_media_type_unknown:
2313                 break;
2314         }
2315
2316         /* Now check for a transition */
2317         if (link_check && (adapter->link_active == 0)) {
2318                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2319                     &adapter->link_duplex);
2320                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2321                 if ((adapter->link_speed != SPEED_1000) &&
2322                     ((hw->mac.type == e1000_82571) ||
2323                     (hw->mac.type == e1000_82572))) {
2324                         int tarc0;
2325                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2326                         tarc0 &= ~SPEED_MODE_BIT;
2327                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2328                 }
2329                 if (bootverbose)
2330                         device_printf(dev, "Link is up %d Mbps %s\n",
2331                             adapter->link_speed,
2332                             ((adapter->link_duplex == FULL_DUPLEX) ?
2333                             "Full Duplex" : "Half Duplex"));
2334                 adapter->link_active = 1;
2335                 adapter->smartspeed = 0;
2336                 ifp->if_baudrate = adapter->link_speed * 1000000;
2337                 if_link_state_change(ifp, LINK_STATE_UP);
2338         } else if (!link_check && (adapter->link_active == 1)) {
2339                 ifp->if_baudrate = adapter->link_speed = 0;
2340                 adapter->link_duplex = 0;
2341                 if (bootverbose)
2342                         device_printf(dev, "Link is Down\n");
2343                 adapter->link_active = 0;
2344                 /* Link down, disable watchdog */
2345                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2346                         txr->queue_status = EM_QUEUE_IDLE;
2347                 if_link_state_change(ifp, LINK_STATE_DOWN);
2348         }
2349 }
2350
2351 /*********************************************************************
2352  *
2353  *  This routine disables all traffic on the adapter by issuing a
2354  *  global reset on the MAC and deallocates TX/RX buffers.
2355  *
2356  *  This routine should always be called with BOTH the CORE
2357  *  and TX locks.
2358  **********************************************************************/
2359
2360 static void
2361 em_stop(void *arg)
2362 {
2363         struct adapter  *adapter = arg;
2364         struct ifnet    *ifp = adapter->ifp;
2365         struct tx_ring  *txr = adapter->tx_rings;
2366
2367         EM_CORE_LOCK_ASSERT(adapter);
2368
2369         INIT_DEBUGOUT("em_stop: begin");
2370
2371         em_disable_intr(adapter);
2372         callout_stop(&adapter->timer);
2373
2374         /* Tell the stack that the interface is no longer active */
2375         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2376         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2377
2378         /* Unarm watchdog timer. */
2379         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2380                 EM_TX_LOCK(txr);
2381                 txr->queue_status = EM_QUEUE_IDLE;
2382                 EM_TX_UNLOCK(txr);
2383         }
2384
2385         e1000_reset_hw(&adapter->hw);
2386         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2387
2388         e1000_led_off(&adapter->hw);
2389         e1000_cleanup_led(&adapter->hw);
2390 }
2391
2392
2393 /*********************************************************************
2394  *
2395  *  Determine hardware revision.
2396  *
2397  **********************************************************************/
2398 static void
2399 em_identify_hardware(struct adapter *adapter)
2400 {
2401         device_t dev = adapter->dev;
2402
2403         /* Make sure our PCI config space has the necessary stuff set */
2404         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2405         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2406             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2407                 device_printf(dev, "Memory Access and/or Bus Master bits "
2408                     "were not set!\n");
2409                 adapter->hw.bus.pci_cmd_word |=
2410                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2411                 pci_write_config(dev, PCIR_COMMAND,
2412                     adapter->hw.bus.pci_cmd_word, 2);
2413         }
2414
2415         /* Save off the information about this board */
2416         adapter->hw.vendor_id = pci_get_vendor(dev);
2417         adapter->hw.device_id = pci_get_device(dev);
2418         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2419         adapter->hw.subsystem_vendor_id =
2420             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2421         adapter->hw.subsystem_device_id =
2422             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2423
2424         /* Do Shared Code Init and Setup */
2425         if (e1000_set_mac_type(&adapter->hw)) {
2426                 device_printf(dev, "Setup init failure\n");
2427                 return;
2428         }
2429 }
2430
2431 static int
2432 em_allocate_pci_resources(struct adapter *adapter)
2433 {
2434         device_t        dev = adapter->dev;
2435         int             rid;
2436
2437         rid = PCIR_BAR(0);
2438         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2439             &rid, RF_ACTIVE);
2440         if (adapter->memory == NULL) {
2441                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2442                 return (ENXIO);
2443         }
2444         adapter->osdep.mem_bus_space_tag =
2445             rman_get_bustag(adapter->memory);
2446         adapter->osdep.mem_bus_space_handle =
2447             rman_get_bushandle(adapter->memory);
2448         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2449
2450         /* Default to a single queue */
2451         adapter->num_queues = 1;
2452
2453         /*
2454          * Setup MSI/X or MSI if PCI Express
2455          */
2456         adapter->msix = em_setup_msix(adapter);
2457
2458         adapter->hw.back = &adapter->osdep;
2459
2460         return (0);
2461 }
2462
2463 /*********************************************************************
2464  *
2465  *  Setup the Legacy or MSI Interrupt handler
2466  *
2467  **********************************************************************/
2468 int
2469 em_allocate_legacy(struct adapter *adapter)
2470 {
2471         device_t dev = adapter->dev;
2472         struct tx_ring  *txr = adapter->tx_rings;
2473         int error, rid = 0;
2474
2475         /* Manually turn off all interrupts */
2476         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2477
2478         if (adapter->msix == 1) /* using MSI */
2479                 rid = 1;
2480         /* We allocate a single interrupt resource */
2481         adapter->res = bus_alloc_resource_any(dev,
2482             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2483         if (adapter->res == NULL) {
2484                 device_printf(dev, "Unable to allocate bus resource: "
2485                     "interrupt\n");
2486                 return (ENXIO);
2487         }
2488
2489         /*
2490          * Allocate a fast interrupt and the associated
2491          * deferred processing contexts.
2492          */
2493         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2494         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2495             taskqueue_thread_enqueue, &adapter->tq);
2496         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2497             device_get_nameunit(adapter->dev));
2498         /* Use a TX only tasklet for local timer */
2499         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2500         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2501             taskqueue_thread_enqueue, &txr->tq);
2502         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2503             device_get_nameunit(adapter->dev));
2504         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2505         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2506             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2507                 device_printf(dev, "Failed to register fast interrupt "
2508                             "handler: %d\n", error);
2509                 taskqueue_free(adapter->tq);
2510                 adapter->tq = NULL;
2511                 return (error);
2512         }
2513         
2514         return (0);
2515 }
2516
2517 /*********************************************************************
2518  *
2519  *  Setup the MSIX Interrupt handlers
2520  *   This is not really Multiqueue, rather
2521  *   its just seperate interrupt vectors
2522  *   for TX, RX, and Link.
2523  *
2524  **********************************************************************/
2525 int
2526 em_allocate_msix(struct adapter *adapter)
2527 {
2528         device_t        dev = adapter->dev;
2529         struct          tx_ring *txr = adapter->tx_rings;
2530         struct          rx_ring *rxr = adapter->rx_rings;
2531         int             error, rid, vector = 0;
2532
2533
2534         /* Make sure all interrupts are disabled */
2535         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2536
2537         /* First set up ring resources */
2538         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2539
2540                 /* RX ring */
2541                 rid = vector + 1;
2542
2543                 rxr->res = bus_alloc_resource_any(dev,
2544                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2545                 if (rxr->res == NULL) {
2546                         device_printf(dev,
2547                             "Unable to allocate bus resource: "
2548                             "RX MSIX Interrupt %d\n", i);
2549                         return (ENXIO);
2550                 }
2551                 if ((error = bus_setup_intr(dev, rxr->res,
2552                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2553                     rxr, &rxr->tag)) != 0) {
2554                         device_printf(dev, "Failed to register RX handler");
2555                         return (error);
2556                 }
2557 #if __FreeBSD_version >= 800504
2558                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2559 #endif
2560                 rxr->msix = vector++; /* NOTE increment vector for TX */
2561                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2562                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2563                     taskqueue_thread_enqueue, &rxr->tq);
2564                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2565                     device_get_nameunit(adapter->dev));
2566                 /*
2567                 ** Set the bit to enable interrupt
2568                 ** in E1000_IMS -- bits 20 and 21
2569                 ** are for RX0 and RX1, note this has
2570                 ** NOTHING to do with the MSIX vector
2571                 */
2572                 rxr->ims = 1 << (20 + i);
2573                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2574
2575                 /* TX ring */
2576                 rid = vector + 1;
2577                 txr->res = bus_alloc_resource_any(dev,
2578                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2579                 if (txr->res == NULL) {
2580                         device_printf(dev,
2581                             "Unable to allocate bus resource: "
2582                             "TX MSIX Interrupt %d\n", i);
2583                         return (ENXIO);
2584                 }
2585                 if ((error = bus_setup_intr(dev, txr->res,
2586                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2587                     txr, &txr->tag)) != 0) {
2588                         device_printf(dev, "Failed to register TX handler");
2589                         return (error);
2590                 }
2591 #if __FreeBSD_version >= 800504
2592                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2593 #endif
2594                 txr->msix = vector++; /* Increment vector for next pass */
2595                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2596                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2597                     taskqueue_thread_enqueue, &txr->tq);
2598                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2599                     device_get_nameunit(adapter->dev));
2600                 /*
2601                 ** Set the bit to enable interrupt
2602                 ** in E1000_IMS -- bits 22 and 23
2603                 ** are for TX0 and TX1, note this has
2604                 ** NOTHING to do with the MSIX vector
2605                 */
2606                 txr->ims = 1 << (22 + i);
2607                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2608         }
2609
2610         /* Link interrupt */
2611         ++rid;
2612         adapter->res = bus_alloc_resource_any(dev,
2613             SYS_RES_IRQ, &rid, RF_ACTIVE);
2614         if (!adapter->res) {
2615                 device_printf(dev,"Unable to allocate "
2616                     "bus resource: Link interrupt [%d]\n", rid);
2617                 return (ENXIO);
2618         }
2619         /* Set the link handler function */
2620         error = bus_setup_intr(dev, adapter->res,
2621             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2622             em_msix_link, adapter, &adapter->tag);
2623         if (error) {
2624                 adapter->res = NULL;
2625                 device_printf(dev, "Failed to register LINK handler");
2626                 return (error);
2627         }
2628 #if __FreeBSD_version >= 800504
2629                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2630 #endif
2631         adapter->linkvec = vector;
2632         adapter->ivars |=  (8 | vector) << 16;
2633         adapter->ivars |= 0x80000000;
2634
2635         return (0);
2636 }
2637
2638
2639 static void
2640 em_free_pci_resources(struct adapter *adapter)
2641 {
2642         device_t        dev = adapter->dev;
2643         struct tx_ring  *txr;
2644         struct rx_ring  *rxr;
2645         int             rid;
2646
2647
2648         /*
2649         ** Release all the queue interrupt resources:
2650         */
2651         for (int i = 0; i < adapter->num_queues; i++) {
2652                 txr = &adapter->tx_rings[i];
2653                 rxr = &adapter->rx_rings[i];
2654                 /* an early abort? */
2655                 if ((txr == NULL) || (rxr == NULL))
2656                         break;
2657                 rid = txr->msix +1;
2658                 if (txr->tag != NULL) {
2659                         bus_teardown_intr(dev, txr->res, txr->tag);
2660                         txr->tag = NULL;
2661                 }
2662                 if (txr->res != NULL)
2663                         bus_release_resource(dev, SYS_RES_IRQ,
2664                             rid, txr->res);
2665                 rid = rxr->msix +1;
2666                 if (rxr->tag != NULL) {
2667                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2668                         rxr->tag = NULL;
2669                 }
2670                 if (rxr->res != NULL)
2671                         bus_release_resource(dev, SYS_RES_IRQ,
2672                             rid, rxr->res);
2673         }
2674
2675         if (adapter->linkvec) /* we are doing MSIX */
2676                 rid = adapter->linkvec + 1;
2677         else
2678                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2679
2680         if (adapter->tag != NULL) {
2681                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2682                 adapter->tag = NULL;
2683         }
2684
2685         if (adapter->res != NULL)
2686                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2687
2688
2689         if (adapter->msix)
2690                 pci_release_msi(dev);
2691
2692         if (adapter->msix_mem != NULL)
2693                 bus_release_resource(dev, SYS_RES_MEMORY,
2694                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2695
2696         if (adapter->memory != NULL)
2697                 bus_release_resource(dev, SYS_RES_MEMORY,
2698                     PCIR_BAR(0), adapter->memory);
2699
2700         if (adapter->flash != NULL)
2701                 bus_release_resource(dev, SYS_RES_MEMORY,
2702                     EM_FLASH, adapter->flash);
2703 }
2704
2705 /*
2706  * Setup MSI or MSI/X
2707  */
2708 static int
2709 em_setup_msix(struct adapter *adapter)
2710 {
2711         device_t dev = adapter->dev;
2712         int val = 0;
2713
2714         /*
2715         ** Setup MSI/X for Hartwell: tests have shown
2716         ** use of two queues to be unstable, and to
2717         ** provide no great gain anyway, so we simply
2718         ** seperate the interrupts and use a single queue.
2719         */
2720         if ((adapter->hw.mac.type == e1000_82574) &&
2721             (em_enable_msix == TRUE)) {
2722                 /* Map the MSIX BAR */
2723                 int rid = PCIR_BAR(EM_MSIX_BAR);
2724                 adapter->msix_mem = bus_alloc_resource_any(dev,
2725                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2726                 if (!adapter->msix_mem) {
2727                         /* May not be enabled */
2728                         device_printf(adapter->dev,
2729                             "Unable to map MSIX table \n");
2730                         goto msi;
2731                 }
2732                 val = pci_msix_count(dev); 
2733                 /* We only need 3 vectors */
2734                 if (val > 3)
2735                         val = 3;
2736                 if ((val != 3) && (val != 5)) {
2737                         bus_release_resource(dev, SYS_RES_MEMORY,
2738                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2739                         adapter->msix_mem = NULL;
2740                         device_printf(adapter->dev,
2741                             "MSIX: incorrect vectors, using MSI\n");
2742                         goto msi;
2743                 }
2744
2745                 if (pci_alloc_msix(dev, &val) == 0) {
2746                         device_printf(adapter->dev,
2747                             "Using MSIX interrupts "
2748                             "with %d vectors\n", val);
2749                 }
2750
2751                 return (val);
2752         }
2753 msi:
2754         val = pci_msi_count(dev);
2755         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2756                 adapter->msix = 1;
2757                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2758                 return (val);
2759         } 
2760         /* Should only happen due to manual configuration */
2761         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2762         return (0);
2763 }
2764
2765
2766 /*********************************************************************
2767  *
2768  *  Initialize the hardware to a configuration
2769  *  as specified by the adapter structure.
2770  *
2771  **********************************************************************/
2772 static void
2773 em_reset(struct adapter *adapter)
2774 {
2775         device_t        dev = adapter->dev;
2776         struct ifnet    *ifp = adapter->ifp;
2777         struct e1000_hw *hw = &adapter->hw;
2778         u16             rx_buffer_size;
2779         u32             pba;
2780
2781         INIT_DEBUGOUT("em_reset: begin");
2782
2783         /* Set up smart power down as default off on newer adapters. */
2784         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2785             hw->mac.type == e1000_82572)) {
2786                 u16 phy_tmp = 0;
2787
2788                 /* Speed up time to link by disabling smart power down. */
2789                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2790                 phy_tmp &= ~IGP02E1000_PM_SPD;
2791                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2792         }
2793
2794         /*
2795          * Packet Buffer Allocation (PBA)
2796          * Writing PBA sets the receive portion of the buffer
2797          * the remainder is used for the transmit buffer.
2798          */
2799         switch (hw->mac.type) {
2800         /* Total Packet Buffer on these is 48K */
2801         case e1000_82571:
2802         case e1000_82572:
2803         case e1000_80003es2lan:
2804                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2805                 break;
2806         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2807                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2808                 break;
2809         case e1000_82574:
2810         case e1000_82583:
2811                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2812                 break;
2813         case e1000_ich8lan:
2814                 pba = E1000_PBA_8K;
2815                 break;
2816         case e1000_ich9lan:
2817         case e1000_ich10lan:
2818                 /* Boost Receive side for jumbo frames */
2819                 if (adapter->max_frame_size > 4096)
2820                         pba = E1000_PBA_14K;
2821                 else
2822                         pba = E1000_PBA_10K;
2823                 break;
2824         case e1000_pchlan:
2825         case e1000_pch2lan:
2826                 pba = E1000_PBA_26K;
2827                 break;
2828         default:
2829                 if (adapter->max_frame_size > 8192)
2830                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2831                 else
2832                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2833         }
2834         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2835
2836         /*
2837          * These parameters control the automatic generation (Tx) and
2838          * response (Rx) to Ethernet PAUSE frames.
2839          * - High water mark should allow for at least two frames to be
2840          *   received after sending an XOFF.
2841          * - Low water mark works best when it is very near the high water mark.
2842          *   This allows the receiver to restart by sending XON when it has
2843          *   drained a bit. Here we use an arbitary value of 1500 which will
2844          *   restart after one full frame is pulled from the buffer. There
2845          *   could be several smaller frames in the buffer and if so they will
2846          *   not trigger the XON until their total number reduces the buffer
2847          *   by 1500.
2848          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2849          */
2850         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2851         hw->fc.high_water = rx_buffer_size -
2852             roundup2(adapter->max_frame_size, 1024);
2853         hw->fc.low_water = hw->fc.high_water - 1500;
2854
2855         if (adapter->fc) /* locally set flow control value? */
2856                 hw->fc.requested_mode = adapter->fc;
2857         else
2858                 hw->fc.requested_mode = e1000_fc_full;
2859
2860         if (hw->mac.type == e1000_80003es2lan)
2861                 hw->fc.pause_time = 0xFFFF;
2862         else
2863                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2864
2865         hw->fc.send_xon = TRUE;
2866
2867         /* Device specific overrides/settings */
2868         switch (hw->mac.type) {
2869         case e1000_pchlan:
2870                 /* Workaround: no TX flow ctrl for PCH */
2871                 hw->fc.requested_mode = e1000_fc_rx_pause;
2872                 hw->fc.pause_time = 0xFFFF; /* override */
2873                 if (ifp->if_mtu > ETHERMTU) {
2874                         hw->fc.high_water = 0x3500;
2875                         hw->fc.low_water = 0x1500;
2876                 } else {
2877                         hw->fc.high_water = 0x5000;
2878                         hw->fc.low_water = 0x3000;
2879                 }
2880                 hw->fc.refresh_time = 0x1000;
2881                 break;
2882         case e1000_pch2lan:
2883                 hw->fc.high_water = 0x5C20;
2884                 hw->fc.low_water = 0x5048;
2885                 hw->fc.pause_time = 0x0650;
2886                 hw->fc.refresh_time = 0x0400;
2887                 /* Jumbos need adjusted PBA */
2888                 if (ifp->if_mtu > ETHERMTU)
2889                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2890                 else
2891                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2892                 break;
2893         case e1000_ich9lan:
2894         case e1000_ich10lan:
2895                 if (ifp->if_mtu > ETHERMTU) {
2896                         hw->fc.high_water = 0x2800;
2897                         hw->fc.low_water = hw->fc.high_water - 8;
2898                         break;
2899                 } 
2900                 /* else fall thru */
2901         default:
2902                 if (hw->mac.type == e1000_80003es2lan)
2903                         hw->fc.pause_time = 0xFFFF;
2904                 break;
2905         }
2906
2907         /* Issue a global reset */
2908         e1000_reset_hw(hw);
2909         E1000_WRITE_REG(hw, E1000_WUC, 0);
2910         em_disable_aspm(adapter);
2911         /* and a re-init */
2912         if (e1000_init_hw(hw) < 0) {
2913                 device_printf(dev, "Hardware Initialization Failed\n");
2914                 return;
2915         }
2916
2917         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2918         e1000_get_phy_info(hw);
2919         e1000_check_for_link(hw);
2920         return;
2921 }
2922
2923 /*********************************************************************
2924  *
2925  *  Setup networking device structure and register an interface.
2926  *
2927  **********************************************************************/
2928 static int
2929 em_setup_interface(device_t dev, struct adapter *adapter)
2930 {
2931         struct ifnet   *ifp;
2932
2933         INIT_DEBUGOUT("em_setup_interface: begin");
2934
2935         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2936         if (ifp == NULL) {
2937                 device_printf(dev, "can not allocate ifnet structure\n");
2938                 return (-1);
2939         }
2940         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2941         ifp->if_init =  em_init;
2942         ifp->if_softc = adapter;
2943         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2944         ifp->if_ioctl = em_ioctl;
2945 #ifdef EM_MULTIQUEUE
2946         /* Multiqueue stack interface */
2947         ifp->if_transmit = em_mq_start;
2948         ifp->if_qflush = em_qflush;
2949 #else
2950         ifp->if_start = em_start;
2951         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2952         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2953         IFQ_SET_READY(&ifp->if_snd);
2954 #endif  
2955
2956         ether_ifattach(ifp, adapter->hw.mac.addr);
2957
2958         ifp->if_capabilities = ifp->if_capenable = 0;
2959
2960
2961         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2962         ifp->if_capabilities |= IFCAP_TSO4;
2963         /*
2964          * Tell the upper layer(s) we
2965          * support full VLAN capability
2966          */
2967         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2968         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2969                              |  IFCAP_VLAN_HWTSO
2970                              |  IFCAP_VLAN_MTU;
2971         ifp->if_capenable = ifp->if_capabilities;
2972
2973         /*
2974         ** Don't turn this on by default, if vlans are
2975         ** created on another pseudo device (eg. lagg)
2976         ** then vlan events are not passed thru, breaking
2977         ** operation, but with HW FILTER off it works. If
2978         ** using vlans directly on the em driver you can
2979         ** enable this and get full hardware tag filtering.
2980         */
2981         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2982
2983 #ifdef DEVICE_POLLING
2984         ifp->if_capabilities |= IFCAP_POLLING;
2985 #endif
2986
2987         /* Enable only WOL MAGIC by default */
2988         if (adapter->wol) {
2989                 ifp->if_capabilities |= IFCAP_WOL;
2990                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2991         }
2992                 
2993         /*
2994          * Specify the media types supported by this adapter and register
2995          * callbacks to update media and link information
2996          */
2997         ifmedia_init(&adapter->media, IFM_IMASK,
2998             em_media_change, em_media_status);
2999         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3000             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3001                 u_char fiber_type = IFM_1000_SX;        /* default type */
3002
3003                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3004                             0, NULL);
3005                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3006         } else {
3007                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3008                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3009                             0, NULL);
3010                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3011                             0, NULL);
3012                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3013                             0, NULL);
3014                 if (adapter->hw.phy.type != e1000_phy_ife) {
3015                         ifmedia_add(&adapter->media,
3016                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3017                         ifmedia_add(&adapter->media,
3018                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3019                 }
3020         }
3021         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3022         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3023         return (0);
3024 }
3025
3026
3027 /*
3028  * Manage DMA'able memory.
3029  */
3030 static void
3031 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3032 {
3033         if (error)
3034                 return;
3035         *(bus_addr_t *) arg = segs[0].ds_addr;
3036 }
3037
3038 static int
3039 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3040         struct em_dma_alloc *dma, int mapflags)
3041 {
3042         int error;
3043
3044         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3045                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3046                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3047                                 BUS_SPACE_MAXADDR,      /* highaddr */
3048                                 NULL, NULL,             /* filter, filterarg */
3049                                 size,                   /* maxsize */
3050                                 1,                      /* nsegments */
3051                                 size,                   /* maxsegsize */
3052                                 0,                      /* flags */
3053                                 NULL,                   /* lockfunc */
3054                                 NULL,                   /* lockarg */
3055                                 &dma->dma_tag);
3056         if (error) {
3057                 device_printf(adapter->dev,
3058                     "%s: bus_dma_tag_create failed: %d\n",
3059                     __func__, error);
3060                 goto fail_0;
3061         }
3062
3063         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3064             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3065         if (error) {
3066                 device_printf(adapter->dev,
3067                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3068                     __func__, (uintmax_t)size, error);
3069                 goto fail_2;
3070         }
3071
3072         dma->dma_paddr = 0;
3073         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3074             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3075         if (error || dma->dma_paddr == 0) {
3076                 device_printf(adapter->dev,
3077                     "%s: bus_dmamap_load failed: %d\n",
3078                     __func__, error);
3079                 goto fail_3;
3080         }
3081
3082         return (0);
3083
3084 fail_3:
3085         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3086 fail_2:
3087         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3088         bus_dma_tag_destroy(dma->dma_tag);
3089 fail_0:
3090         dma->dma_map = NULL;
3091         dma->dma_tag = NULL;
3092
3093         return (error);
3094 }
3095
3096 static void
3097 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3098 {
3099         if (dma->dma_tag == NULL)
3100                 return;
3101         if (dma->dma_map != NULL) {
3102                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3103                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3104                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3105                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3106                 dma->dma_map = NULL;
3107         }
3108         bus_dma_tag_destroy(dma->dma_tag);
3109         dma->dma_tag = NULL;
3110 }
3111
3112
3113 /*********************************************************************
3114  *
3115  *  Allocate memory for the transmit and receive rings, and then
3116  *  the descriptors associated with each, called only once at attach.
3117  *
3118  **********************************************************************/
3119 static int
3120 em_allocate_queues(struct adapter *adapter)
3121 {
3122         device_t                dev = adapter->dev;
3123         struct tx_ring          *txr = NULL;
3124         struct rx_ring          *rxr = NULL;
3125         int rsize, tsize, error = E1000_SUCCESS;
3126         int txconf = 0, rxconf = 0;
3127
3128
3129         /* Allocate the TX ring struct memory */
3130         if (!(adapter->tx_rings =
3131             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3132             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3133                 device_printf(dev, "Unable to allocate TX ring memory\n");
3134                 error = ENOMEM;
3135                 goto fail;
3136         }
3137
3138         /* Now allocate the RX */
3139         if (!(adapter->rx_rings =
3140             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3141             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3142                 device_printf(dev, "Unable to allocate RX ring memory\n");
3143                 error = ENOMEM;
3144                 goto rx_fail;
3145         }
3146
3147         tsize = roundup2(adapter->num_tx_desc *
3148             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3149         /*
3150          * Now set up the TX queues, txconf is needed to handle the
3151          * possibility that things fail midcourse and we need to
3152          * undo memory gracefully
3153          */ 
3154         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3155                 /* Set up some basics */
3156                 txr = &adapter->tx_rings[i];
3157                 txr->adapter = adapter;
3158                 txr->me = i;
3159
3160                 /* Initialize the TX lock */
3161                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3162                     device_get_nameunit(dev), txr->me);
3163                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3164
3165                 if (em_dma_malloc(adapter, tsize,
3166                         &txr->txdma, BUS_DMA_NOWAIT)) {
3167                         device_printf(dev,
3168                             "Unable to allocate TX Descriptor memory\n");
3169                         error = ENOMEM;
3170                         goto err_tx_desc;
3171                 }
3172                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3173                 bzero((void *)txr->tx_base, tsize);
3174
3175                 if (em_allocate_transmit_buffers(txr)) {
3176                         device_printf(dev,
3177                             "Critical Failure setting up transmit buffers\n");
3178                         error = ENOMEM;
3179                         goto err_tx_desc;
3180                 }
3181 #if __FreeBSD_version >= 800000
3182                 /* Allocate a buf ring */
3183                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3184                     M_WAITOK, &txr->tx_mtx);
3185 #endif
3186         }
3187
3188         /*
3189          * Next the RX queues...
3190          */ 
3191         rsize = roundup2(adapter->num_rx_desc *
3192             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3193         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3194                 rxr = &adapter->rx_rings[i];
3195                 rxr->adapter = adapter;
3196                 rxr->me = i;
3197
3198                 /* Initialize the RX lock */
3199                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3200                     device_get_nameunit(dev), txr->me);
3201                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3202
3203                 if (em_dma_malloc(adapter, rsize,
3204                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3205                         device_printf(dev,
3206                             "Unable to allocate RxDescriptor memory\n");
3207                         error = ENOMEM;
3208                         goto err_rx_desc;
3209                 }
3210                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3211                 bzero((void *)rxr->rx_base, rsize);
3212
3213                 /* Allocate receive buffers for the ring*/
3214                 if (em_allocate_receive_buffers(rxr)) {
3215                         device_printf(dev,
3216                             "Critical Failure setting up receive buffers\n");
3217                         error = ENOMEM;
3218                         goto err_rx_desc;
3219                 }
3220         }
3221
3222         return (0);
3223
3224 err_rx_desc:
3225         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3226                 em_dma_free(adapter, &rxr->rxdma);
3227 err_tx_desc:
3228         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3229                 em_dma_free(adapter, &txr->txdma);
3230         free(adapter->rx_rings, M_DEVBUF);
3231 rx_fail:
3232 #if __FreeBSD_version >= 800000
3233         buf_ring_free(txr->br, M_DEVBUF);
3234 #endif
3235         free(adapter->tx_rings, M_DEVBUF);
3236 fail:
3237         return (error);
3238 }
3239
3240
3241 /*********************************************************************
3242  *
3243  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3244  *  the information needed to transmit a packet on the wire. This is
3245  *  called only once at attach, setup is done every reset.
3246  *
3247  **********************************************************************/
3248 static int
3249 em_allocate_transmit_buffers(struct tx_ring *txr)
3250 {
3251         struct adapter *adapter = txr->adapter;
3252         device_t dev = adapter->dev;
3253         struct em_buffer *txbuf;
3254         int error, i;
3255
3256         /*
3257          * Setup DMA descriptor areas.
3258          */
3259         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3260                                1, 0,                    /* alignment, bounds */
3261                                BUS_SPACE_MAXADDR,       /* lowaddr */
3262                                BUS_SPACE_MAXADDR,       /* highaddr */
3263                                NULL, NULL,              /* filter, filterarg */
3264                                EM_TSO_SIZE,             /* maxsize */
3265                                EM_MAX_SCATTER,          /* nsegments */
3266                                PAGE_SIZE,               /* maxsegsize */
3267                                0,                       /* flags */
3268                                NULL,                    /* lockfunc */
3269                                NULL,                    /* lockfuncarg */
3270                                &txr->txtag))) {
3271                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3272                 goto fail;
3273         }
3274
3275         if (!(txr->tx_buffers =
3276             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3277             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3278                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3279                 error = ENOMEM;
3280                 goto fail;
3281         }
3282
3283         /* Create the descriptor buffer dma maps */
3284         txbuf = txr->tx_buffers;
3285         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3286                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3287                 if (error != 0) {
3288                         device_printf(dev, "Unable to create TX DMA map\n");
3289                         goto fail;
3290                 }
3291         }
3292
3293         return 0;
3294 fail:
3295         /* We free all, it handles case where we are in the middle */
3296         em_free_transmit_structures(adapter);
3297         return (error);
3298 }
3299
3300 /*********************************************************************
3301  *
3302  *  Initialize a transmit ring.
3303  *
3304  **********************************************************************/
3305 static void
3306 em_setup_transmit_ring(struct tx_ring *txr)
3307 {
3308         struct adapter *adapter = txr->adapter;
3309         struct em_buffer *txbuf;
3310         int i;
3311 #ifdef DEV_NETMAP
3312         struct netmap_adapter *na = NA(adapter->ifp);
3313         struct netmap_slot *slot;
3314 #endif /* DEV_NETMAP */
3315
3316         /* Clear the old descriptor contents */
3317         EM_TX_LOCK(txr);
3318 #ifdef DEV_NETMAP
3319         slot = netmap_reset(na, NR_TX, txr->me, 0);
3320 #endif /* DEV_NETMAP */
3321
3322         bzero((void *)txr->tx_base,
3323               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3324         /* Reset indices */
3325         txr->next_avail_desc = 0;
3326         txr->next_to_clean = 0;
3327
3328         /* Free any existing tx buffers. */
3329         txbuf = txr->tx_buffers;
3330         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3331                 if (txbuf->m_head != NULL) {
3332                         bus_dmamap_sync(txr->txtag, txbuf->map,
3333                             BUS_DMASYNC_POSTWRITE);
3334                         bus_dmamap_unload(txr->txtag, txbuf->map);
3335                         m_freem(txbuf->m_head);
3336                         txbuf->m_head = NULL;
3337                 }
3338 #ifdef DEV_NETMAP
3339                 if (slot) {
3340                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3341                         uint64_t paddr;
3342                         void *addr;
3343
3344                         addr = PNMB(slot + si, &paddr);
3345                         txr->tx_base[i].buffer_addr = htole64(paddr);
3346                         /* reload the map for netmap mode */
3347                         netmap_load_map(txr->txtag, txbuf->map, addr);
3348                 }
3349 #endif /* DEV_NETMAP */
3350
3351                 /* clear the watch index */
3352                 txbuf->next_eop = -1;
3353         }
3354
3355         /* Set number of descriptors available */
3356         txr->tx_avail = adapter->num_tx_desc;
3357         txr->queue_status = EM_QUEUE_IDLE;
3358
3359         /* Clear checksum offload context. */
3360         txr->last_hw_offload = 0;
3361         txr->last_hw_ipcss = 0;
3362         txr->last_hw_ipcso = 0;
3363         txr->last_hw_tucss = 0;
3364         txr->last_hw_tucso = 0;
3365
3366         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3367             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3368         EM_TX_UNLOCK(txr);
3369 }
3370
3371 /*********************************************************************
3372  *
3373  *  Initialize all transmit rings.
3374  *
3375  **********************************************************************/
3376 static void
3377 em_setup_transmit_structures(struct adapter *adapter)
3378 {
3379         struct tx_ring *txr = adapter->tx_rings;
3380
3381         for (int i = 0; i < adapter->num_queues; i++, txr++)
3382                 em_setup_transmit_ring(txr);
3383
3384         return;
3385 }
3386
3387 /*********************************************************************
3388  *
3389  *  Enable transmit unit.
3390  *
3391  **********************************************************************/
3392 static void
3393 em_initialize_transmit_unit(struct adapter *adapter)
3394 {
3395         struct tx_ring  *txr = adapter->tx_rings;
3396         struct e1000_hw *hw = &adapter->hw;
3397         u32     tctl, tarc, tipg = 0;
3398
3399          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3400
3401         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3402                 u64 bus_addr = txr->txdma.dma_paddr;
3403                 /* Base and Len of TX Ring */
3404                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3405                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3406                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3407                     (u32)(bus_addr >> 32));
3408                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3409                     (u32)bus_addr);
3410                 /* Init the HEAD/TAIL indices */
3411                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3412                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3413
3414                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3415                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3416                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3417
3418                 txr->queue_status = EM_QUEUE_IDLE;
3419         }
3420
3421         /* Set the default values for the Tx Inter Packet Gap timer */
3422         switch (adapter->hw.mac.type) {
3423         case e1000_80003es2lan:
3424                 tipg = DEFAULT_82543_TIPG_IPGR1;
3425                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3426                     E1000_TIPG_IPGR2_SHIFT;
3427                 break;
3428         default:
3429                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3430                     (adapter->hw.phy.media_type ==
3431                     e1000_media_type_internal_serdes))
3432                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3433                 else
3434                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3435                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3436                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3437         }
3438
3439         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3440         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3441
3442         if(adapter->hw.mac.type >= e1000_82540)
3443                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3444                     adapter->tx_abs_int_delay.value);
3445
3446         if ((adapter->hw.mac.type == e1000_82571) ||
3447             (adapter->hw.mac.type == e1000_82572)) {
3448                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3449                 tarc |= SPEED_MODE_BIT;
3450                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3451         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3452                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3453                 tarc |= 1;
3454                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3455                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3456                 tarc |= 1;
3457                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3458         }
3459
3460         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3461         if (adapter->tx_int_delay.value > 0)
3462                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3463
3464         /* Program the Transmit Control Register */
3465         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3466         tctl &= ~E1000_TCTL_CT;
3467         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3468                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3469
3470         if (adapter->hw.mac.type >= e1000_82571)
3471                 tctl |= E1000_TCTL_MULR;
3472
3473         /* This write will effectively turn on the transmit unit. */
3474         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3475
3476 }
3477
3478
3479 /*********************************************************************
3480  *
3481  *  Free all transmit rings.
3482  *
3483  **********************************************************************/
3484 static void
3485 em_free_transmit_structures(struct adapter *adapter)
3486 {
3487         struct tx_ring *txr = adapter->tx_rings;
3488
3489         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3490                 EM_TX_LOCK(txr);
3491                 em_free_transmit_buffers(txr);
3492                 em_dma_free(adapter, &txr->txdma);
3493                 EM_TX_UNLOCK(txr);
3494                 EM_TX_LOCK_DESTROY(txr);
3495         }
3496
3497         free(adapter->tx_rings, M_DEVBUF);
3498 }
3499
3500 /*********************************************************************
3501  *
3502  *  Free transmit ring related data structures.
3503  *
3504  **********************************************************************/
3505 static void
3506 em_free_transmit_buffers(struct tx_ring *txr)
3507 {
3508         struct adapter          *adapter = txr->adapter;
3509         struct em_buffer        *txbuf;
3510
3511         INIT_DEBUGOUT("free_transmit_ring: begin");
3512
3513         if (txr->tx_buffers == NULL)
3514                 return;
3515
3516         for (int i = 0; i < adapter->num_tx_desc; i++) {
3517                 txbuf = &txr->tx_buffers[i];
3518                 if (txbuf->m_head != NULL) {
3519                         bus_dmamap_sync(txr->txtag, txbuf->map,
3520                             BUS_DMASYNC_POSTWRITE);
3521                         bus_dmamap_unload(txr->txtag,
3522                             txbuf->map);
3523                         m_freem(txbuf->m_head);
3524                         txbuf->m_head = NULL;
3525                         if (txbuf->map != NULL) {
3526                                 bus_dmamap_destroy(txr->txtag,
3527                                     txbuf->map);
3528                                 txbuf->map = NULL;
3529                         }
3530                 } else if (txbuf->map != NULL) {
3531                         bus_dmamap_unload(txr->txtag,
3532                             txbuf->map);
3533                         bus_dmamap_destroy(txr->txtag,
3534                             txbuf->map);
3535                         txbuf->map = NULL;
3536                 }
3537         }
3538 #if __FreeBSD_version >= 800000
3539         if (txr->br != NULL)
3540                 buf_ring_free(txr->br, M_DEVBUF);
3541 #endif
3542         if (txr->tx_buffers != NULL) {
3543                 free(txr->tx_buffers, M_DEVBUF);
3544                 txr->tx_buffers = NULL;
3545         }
3546         if (txr->txtag != NULL) {
3547                 bus_dma_tag_destroy(txr->txtag);
3548                 txr->txtag = NULL;
3549         }
3550         return;
3551 }
3552
3553
3554 /*********************************************************************
3555  *  The offload context is protocol specific (TCP/UDP) and thus
3556  *  only needs to be set when the protocol changes. The occasion
3557  *  of a context change can be a performance detriment, and
3558  *  might be better just disabled. The reason arises in the way
3559  *  in which the controller supports pipelined requests from the
3560  *  Tx data DMA. Up to four requests can be pipelined, and they may
3561  *  belong to the same packet or to multiple packets. However all
3562  *  requests for one packet are issued before a request is issued
3563  *  for a subsequent packet and if a request for the next packet
3564  *  requires a context change, that request will be stalled
3565  *  until the previous request completes. This means setting up
3566  *  a new context effectively disables pipelined Tx data DMA which
3567  *  in turn greatly slow down performance to send small sized
3568  *  frames. 
3569  **********************************************************************/
3570 static void
3571 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3572     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3573 {
3574         struct adapter                  *adapter = txr->adapter;
3575         struct e1000_context_desc       *TXD = NULL;
3576         struct em_buffer                *tx_buffer;
3577         int                             cur, hdr_len;
3578         u32                             cmd = 0;
3579         u16                             offload = 0;
3580         u8                              ipcso, ipcss, tucso, tucss;
3581
3582         ipcss = ipcso = tucss = tucso = 0;
3583         hdr_len = ip_off + (ip->ip_hl << 2);
3584         cur = txr->next_avail_desc;
3585
3586         /* Setup of IP header checksum. */
3587         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3588                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3589                 offload |= CSUM_IP;
3590                 ipcss = ip_off;
3591                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3592                 /*
3593                  * Start offset for header checksum calculation.
3594                  * End offset for header checksum calculation.
3595                  * Offset of place to put the checksum.
3596                  */
3597                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3598                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3599                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3600                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3601                 cmd |= E1000_TXD_CMD_IP;
3602         }
3603
3604         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3605                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3606                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3607                 offload |= CSUM_TCP;
3608                 tucss = hdr_len;
3609                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3610                 /*
3611                  * Setting up new checksum offload context for every frames
3612                  * takes a lot of processing time for hardware. This also
3613                  * reduces performance a lot for small sized frames so avoid
3614                  * it if driver can use previously configured checksum
3615                  * offload context.
3616                  */
3617                 if (txr->last_hw_offload == offload) {
3618                         if (offload & CSUM_IP) {
3619                                 if (txr->last_hw_ipcss == ipcss &&
3620                                     txr->last_hw_ipcso == ipcso &&
3621                                     txr->last_hw_tucss == tucss &&
3622                                     txr->last_hw_tucso == tucso)
3623                                         return;
3624                         } else {
3625                                 if (txr->last_hw_tucss == tucss &&
3626                                     txr->last_hw_tucso == tucso)
3627                                         return;
3628                         }
3629                 }
3630                 txr->last_hw_offload = offload;
3631                 txr->last_hw_tucss = tucss;
3632                 txr->last_hw_tucso = tucso;
3633                 /*
3634                  * Start offset for payload checksum calculation.
3635                  * End offset for payload checksum calculation.
3636                  * Offset of place to put the checksum.
3637                  */
3638                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3640                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3641                 TXD->upper_setup.tcp_fields.tucso = tucso;
3642                 cmd |= E1000_TXD_CMD_TCP;
3643         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3644                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3645                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3646                 tucss = hdr_len;
3647                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3648                 /*
3649                  * Setting up new checksum offload context for every frames
3650                  * takes a lot of processing time for hardware. This also
3651                  * reduces performance a lot for small sized frames so avoid
3652                  * it if driver can use previously configured checksum
3653                  * offload context.
3654                  */
3655                 if (txr->last_hw_offload == offload) {
3656                         if (offload & CSUM_IP) {
3657                                 if (txr->last_hw_ipcss == ipcss &&
3658                                     txr->last_hw_ipcso == ipcso &&
3659                                     txr->last_hw_tucss == tucss &&
3660                                     txr->last_hw_tucso == tucso)
3661                                         return;
3662                         } else {
3663                                 if (txr->last_hw_tucss == tucss &&
3664                                     txr->last_hw_tucso == tucso)
3665                                         return;
3666                         }
3667                 }
3668                 txr->last_hw_offload = offload;
3669                 txr->last_hw_tucss = tucss;
3670                 txr->last_hw_tucso = tucso;
3671                 /*
3672                  * Start offset for header checksum calculation.
3673                  * End offset for header checksum calculation.
3674                  * Offset of place to put the checksum.
3675                  */
3676                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3677                 TXD->upper_setup.tcp_fields.tucss = tucss;
3678                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3679                 TXD->upper_setup.tcp_fields.tucso = tucso;
3680         }
3681   
3682         if (offload & CSUM_IP) {
3683                 txr->last_hw_ipcss = ipcss;
3684                 txr->last_hw_ipcso = ipcso;
3685         }
3686
3687         TXD->tcp_seg_setup.data = htole32(0);
3688         TXD->cmd_and_length =
3689             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3690         tx_buffer = &txr->tx_buffers[cur];
3691         tx_buffer->m_head = NULL;
3692         tx_buffer->next_eop = -1;
3693
3694         if (++cur == adapter->num_tx_desc)
3695                 cur = 0;
3696
3697         txr->tx_avail--;
3698         txr->next_avail_desc = cur;
3699 }
3700
3701
3702 /**********************************************************************
3703  *
3704  *  Setup work for hardware segmentation offload (TSO)
3705  *
3706  **********************************************************************/
3707 static void
3708 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3709     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3710 {
3711         struct adapter                  *adapter = txr->adapter;
3712         struct e1000_context_desc       *TXD;
3713         struct em_buffer                *tx_buffer;
3714         int cur, hdr_len;
3715
3716         /*
3717          * In theory we can use the same TSO context if and only if
3718          * frame is the same type(IP/TCP) and the same MSS. However
3719          * checking whether a frame has the same IP/TCP structure is
3720          * hard thing so just ignore that and always restablish a
3721          * new TSO context.
3722          */
3723         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3724         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3725                       E1000_TXD_DTYP_D |        /* Data descr type */
3726                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3727
3728         /* IP and/or TCP header checksum calculation and insertion. */
3729         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3730
3731         cur = txr->next_avail_desc;
3732         tx_buffer = &txr->tx_buffers[cur];
3733         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3734
3735         /*
3736          * Start offset for header checksum calculation.
3737          * End offset for header checksum calculation.
3738          * Offset of place put the checksum.
3739          */
3740         TXD->lower_setup.ip_fields.ipcss = ip_off;
3741         TXD->lower_setup.ip_fields.ipcse =
3742             htole16(ip_off + (ip->ip_hl << 2) - 1);
3743         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3744         /*
3745          * Start offset for payload checksum calculation.
3746          * End offset for payload checksum calculation.
3747          * Offset of place to put the checksum.
3748          */
3749         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3750         TXD->upper_setup.tcp_fields.tucse = 0;
3751         TXD->upper_setup.tcp_fields.tucso =
3752             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3753         /*
3754          * Payload size per packet w/o any headers.
3755          * Length of all headers up to payload.
3756          */
3757         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3758         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3759
3760         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3761                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3762                                 E1000_TXD_CMD_TSE |     /* TSE context */
3763                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3764                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3765                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3766
3767         tx_buffer->m_head = NULL;
3768         tx_buffer->next_eop = -1;
3769
3770         if (++cur == adapter->num_tx_desc)
3771                 cur = 0;
3772
3773         txr->tx_avail--;
3774         txr->next_avail_desc = cur;
3775         txr->tx_tso = TRUE;
3776 }
3777
3778
3779 /**********************************************************************
3780  *
3781  *  Examine each tx_buffer in the used queue. If the hardware is done
3782  *  processing the packet then free associated resources. The
3783  *  tx_buffer is put back on the free queue.
3784  *
3785  **********************************************************************/
3786 static void
3787 em_txeof(struct tx_ring *txr)
3788 {
3789         struct adapter  *adapter = txr->adapter;
3790         int first, last, done, processed;
3791         struct em_buffer *tx_buffer;
3792         struct e1000_tx_desc   *tx_desc, *eop_desc;
3793         struct ifnet   *ifp = adapter->ifp;
3794
3795         EM_TX_LOCK_ASSERT(txr);
3796 #ifdef DEV_NETMAP
3797         if (ifp->if_capenable & IFCAP_NETMAP) {
3798                 struct netmap_adapter *na = NA(ifp);
3799
3800                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3801                 EM_TX_UNLOCK(txr);
3802                 EM_CORE_LOCK(adapter);
3803                 selwakeuppri(&na->tx_si, PI_NET);
3804                 EM_CORE_UNLOCK(adapter);
3805                 EM_TX_LOCK(txr);
3806                 return;
3807         }
3808 #endif /* DEV_NETMAP */
3809
3810         /* No work, make sure watchdog is off */
3811         if (txr->tx_avail == adapter->num_tx_desc) {
3812                 txr->queue_status = EM_QUEUE_IDLE;
3813                 return;
3814         }
3815
3816         processed = 0;
3817         first = txr->next_to_clean;
3818         tx_desc = &txr->tx_base[first];
3819         tx_buffer = &txr->tx_buffers[first];
3820         last = tx_buffer->next_eop;
3821         eop_desc = &txr->tx_base[last];
3822
3823         /*
3824          * What this does is get the index of the
3825          * first descriptor AFTER the EOP of the 
3826          * first packet, that way we can do the
3827          * simple comparison on the inner while loop.
3828          */
3829         if (++last == adapter->num_tx_desc)
3830                 last = 0;
3831         done = last;
3832
3833         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3834             BUS_DMASYNC_POSTREAD);
3835
3836         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3837                 /* We clean the range of the packet */
3838                 while (first != done) {
3839                         tx_desc->upper.data = 0;
3840                         tx_desc->lower.data = 0;
3841                         tx_desc->buffer_addr = 0;
3842                         ++txr->tx_avail;
3843                         ++processed;
3844
3845                         if (tx_buffer->m_head) {
3846                                 bus_dmamap_sync(txr->txtag,
3847                                     tx_buffer->map,
3848                                     BUS_DMASYNC_POSTWRITE);
3849                                 bus_dmamap_unload(txr->txtag,
3850                                     tx_buffer->map);
3851                                 m_freem(tx_buffer->m_head);
3852                                 tx_buffer->m_head = NULL;
3853                         }
3854                         tx_buffer->next_eop = -1;
3855                         txr->watchdog_time = ticks;
3856
3857                         if (++first == adapter->num_tx_desc)
3858                                 first = 0;
3859
3860                         tx_buffer = &txr->tx_buffers[first];
3861                         tx_desc = &txr->tx_base[first];
3862                 }
3863                 ++ifp->if_opackets;
3864                 /* See if we can continue to the next packet */
3865                 last = tx_buffer->next_eop;
3866                 if (last != -1) {
3867                         eop_desc = &txr->tx_base[last];
3868                         /* Get new done point */
3869                         if (++last == adapter->num_tx_desc) last = 0;
3870                         done = last;
3871                 } else
3872                         break;
3873         }
3874         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3875             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3876
3877         txr->next_to_clean = first;
3878
3879         /*
3880         ** Watchdog calculation, we know there's
3881         ** work outstanding or the first return
3882         ** would have been taken, so none processed
3883         ** for too long indicates a hang. local timer
3884         ** will examine this and do a reset if needed.
3885         */
3886         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3887                 txr->queue_status = EM_QUEUE_HUNG;
3888
3889         /*
3890          * If we have a minimum free, clear IFF_DRV_OACTIVE
3891          * to tell the stack that it is OK to send packets.
3892          * Notice that all writes of OACTIVE happen under the
3893          * TX lock which, with a single queue, guarantees 
3894          * sanity.
3895          */
3896         if (txr->tx_avail >= EM_MAX_SCATTER)
3897                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3898
3899         /* Disable watchdog if all clean */
3900         if (txr->tx_avail == adapter->num_tx_desc) {
3901                 txr->queue_status = EM_QUEUE_IDLE;
3902         } 
3903 }
3904
3905
3906 /*********************************************************************
3907  *
3908  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3909  *
3910  **********************************************************************/
3911 static void
3912 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3913 {
3914         struct adapter          *adapter = rxr->adapter;
3915         struct mbuf             *m;
3916         bus_dma_segment_t       segs[1];
3917         struct em_buffer        *rxbuf;
3918         int                     i, j, error, nsegs;
3919         bool                    cleaned = FALSE;
3920
3921         i = j = rxr->next_to_refresh;
3922         /*
3923         ** Get one descriptor beyond
3924         ** our work mark to control
3925         ** the loop.
3926         */
3927         if (++j == adapter->num_rx_desc)
3928                 j = 0;
3929
3930         while (j != limit) {
3931                 rxbuf = &rxr->rx_buffers[i];
3932                 if (rxbuf->m_head == NULL) {
3933                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3934                             M_PKTHDR, adapter->rx_mbuf_sz);
3935                         /*
3936                         ** If we have a temporary resource shortage
3937                         ** that causes a failure, just abort refresh
3938                         ** for now, we will return to this point when
3939                         ** reinvoked from em_rxeof.
3940                         */
3941                         if (m == NULL)
3942                                 goto update;
3943                 } else
3944                         m = rxbuf->m_head;
3945
3946                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3947                 m->m_flags |= M_PKTHDR;
3948                 m->m_data = m->m_ext.ext_buf;
3949
3950                 /* Use bus_dma machinery to setup the memory mapping  */
3951                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3952                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3953                 if (error != 0) {
3954                         printf("Refresh mbufs: hdr dmamap load"
3955                             " failure - %d\n", error);
3956                         m_free(m);
3957                         rxbuf->m_head = NULL;
3958                         goto update;
3959                 }
3960                 rxbuf->m_head = m;
3961                 bus_dmamap_sync(rxr->rxtag,
3962                     rxbuf->map, BUS_DMASYNC_PREREAD);
3963                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3964                 cleaned = TRUE;
3965
3966                 i = j; /* Next is precalulated for us */
3967                 rxr->next_to_refresh = i;
3968                 /* Calculate next controlling index */
3969                 if (++j == adapter->num_rx_desc)
3970                         j = 0;
3971         }
3972 update:
3973         /*
3974         ** Update the tail pointer only if,
3975         ** and as far as we have refreshed.
3976         */
3977         if (cleaned)
3978                 E1000_WRITE_REG(&adapter->hw,
3979                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3980
3981         return;
3982 }
3983
3984
3985 /*********************************************************************
3986  *
3987  *  Allocate memory for rx_buffer structures. Since we use one
3988  *  rx_buffer per received packet, the maximum number of rx_buffer's
3989  *  that we'll need is equal to the number of receive descriptors
3990  *  that we've allocated.
3991  *
3992  **********************************************************************/
3993 static int
3994 em_allocate_receive_buffers(struct rx_ring *rxr)
3995 {
3996         struct adapter          *adapter = rxr->adapter;
3997         device_t                dev = adapter->dev;
3998         struct em_buffer        *rxbuf;
3999         int                     error;
4000
4001         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4002             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4003         if (rxr->rx_buffers == NULL) {
4004                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4005                 return (ENOMEM);
4006         }
4007
4008         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4009                                 1, 0,                   /* alignment, bounds */
4010                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4011                                 BUS_SPACE_MAXADDR,      /* highaddr */
4012                                 NULL, NULL,             /* filter, filterarg */
4013                                 MJUM9BYTES,             /* maxsize */
4014                                 1,                      /* nsegments */
4015                                 MJUM9BYTES,             /* maxsegsize */
4016                                 0,                      /* flags */
4017                                 NULL,                   /* lockfunc */
4018                                 NULL,                   /* lockarg */
4019                                 &rxr->rxtag);
4020         if (error) {
4021                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4022                     __func__, error);
4023                 goto fail;
4024         }
4025
4026         rxbuf = rxr->rx_buffers;
4027         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4028                 rxbuf = &rxr->rx_buffers[i];
4029                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4030                     &rxbuf->map);
4031                 if (error) {
4032                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4033                             __func__, error);
4034                         goto fail;
4035                 }
4036         }
4037
4038         return (0);
4039
4040 fail:
4041         em_free_receive_structures(adapter);
4042         return (error);
4043 }
4044
4045
4046 /*********************************************************************
4047  *
4048  *  Initialize a receive ring and its buffers.
4049  *
4050  **********************************************************************/
4051 static int
4052 em_setup_receive_ring(struct rx_ring *rxr)
4053 {
4054         struct  adapter         *adapter = rxr->adapter;
4055         struct em_buffer        *rxbuf;
4056         bus_dma_segment_t       seg[1];
4057         int                     rsize, nsegs, error = 0;
4058 #ifdef DEV_NETMAP
4059         struct netmap_adapter *na = NA(adapter->ifp);
4060         struct netmap_slot *slot;
4061 #endif
4062
4063
4064         /* Clear the ring contents */
4065         EM_RX_LOCK(rxr);
4066         rsize = roundup2(adapter->num_rx_desc *
4067             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4068         bzero((void *)rxr->rx_base, rsize);
4069 #ifdef DEV_NETMAP
4070         slot = netmap_reset(na, NR_RX, 0, 0);
4071 #endif
4072
4073         /*
4074         ** Free current RX buffer structs and their mbufs
4075         */
4076         for (int i = 0; i < adapter->num_rx_desc; i++) {
4077                 rxbuf = &rxr->rx_buffers[i];
4078                 if (rxbuf->m_head != NULL) {
4079                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4080                             BUS_DMASYNC_POSTREAD);
4081                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4082                         m_freem(rxbuf->m_head);
4083                         rxbuf->m_head = NULL; /* mark as freed */
4084                 }
4085         }
4086
4087         /* Now replenish the mbufs */
4088         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4089                 rxbuf = &rxr->rx_buffers[j];
4090 #ifdef DEV_NETMAP
4091                 if (slot) {
4092                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4093                         uint64_t paddr;
4094                         void *addr;
4095
4096                         addr = PNMB(slot + si, &paddr);
4097                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4098                         /* Update descriptor */
4099                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4100                         continue;
4101                 }
4102 #endif /* DEV_NETMAP */
4103                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4104                     M_PKTHDR, adapter->rx_mbuf_sz);
4105                 if (rxbuf->m_head == NULL) {
4106                         error = ENOBUFS;
4107                         goto fail;
4108                 }
4109                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4110                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4111                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4112
4113                 /* Get the memory mapping */
4114                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4115                     rxbuf->map, rxbuf->m_head, seg,
4116                     &nsegs, BUS_DMA_NOWAIT);
4117                 if (error != 0) {
4118                         m_freem(rxbuf->m_head);
4119                         rxbuf->m_head = NULL;
4120                         goto fail;
4121                 }
4122                 bus_dmamap_sync(rxr->rxtag,
4123                     rxbuf->map, BUS_DMASYNC_PREREAD);
4124
4125                 /* Update descriptor */
4126                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4127         }
4128         rxr->next_to_check = 0;
4129         rxr->next_to_refresh = 0;
4130         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4131             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4132
4133 fail:
4134         EM_RX_UNLOCK(rxr);
4135         return (error);
4136 }
4137
4138 /*********************************************************************
4139  *
4140  *  Initialize all receive rings.
4141  *
4142  **********************************************************************/
4143 static int
4144 em_setup_receive_structures(struct adapter *adapter)
4145 {
4146         struct rx_ring *rxr = adapter->rx_rings;
4147         int q;
4148
4149         for (q = 0; q < adapter->num_queues; q++, rxr++)
4150                 if (em_setup_receive_ring(rxr))
4151                         goto fail;
4152
4153         return (0);
4154 fail:
4155         /*
4156          * Free RX buffers allocated so far, we will only handle
4157          * the rings that completed, the failing case will have
4158          * cleaned up for itself. 'q' failed, so its the terminus.
4159          */
4160         for (int i = 0; i < q; ++i) {
4161                 rxr = &adapter->rx_rings[i];
4162                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4163                         struct em_buffer *rxbuf;
4164                         rxbuf = &rxr->rx_buffers[n];
4165                         if (rxbuf->m_head != NULL) {
4166                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4167                                   BUS_DMASYNC_POSTREAD);
4168                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4169                                 m_freem(rxbuf->m_head);
4170                                 rxbuf->m_head = NULL;
4171                         }
4172                 }
4173                 rxr->next_to_check = 0;
4174                 rxr->next_to_refresh = 0;
4175         }
4176
4177         return (ENOBUFS);
4178 }
4179
4180 /*********************************************************************
4181  *
4182  *  Free all receive rings.
4183  *
4184  **********************************************************************/
4185 static void
4186 em_free_receive_structures(struct adapter *adapter)
4187 {
4188         struct rx_ring *rxr = adapter->rx_rings;
4189
4190         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4191                 em_free_receive_buffers(rxr);
4192                 /* Free the ring memory as well */
4193                 em_dma_free(adapter, &rxr->rxdma);
4194                 EM_RX_LOCK_DESTROY(rxr);
4195         }
4196
4197         free(adapter->rx_rings, M_DEVBUF);
4198 }
4199
4200
4201 /*********************************************************************
4202  *
4203  *  Free receive ring data structures
4204  *
4205  **********************************************************************/
4206 static void
4207 em_free_receive_buffers(struct rx_ring *rxr)
4208 {
4209         struct adapter          *adapter = rxr->adapter;
4210         struct em_buffer        *rxbuf = NULL;
4211
4212         INIT_DEBUGOUT("free_receive_buffers: begin");
4213
4214         if (rxr->rx_buffers != NULL) {
4215                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4216                         rxbuf = &rxr->rx_buffers[i];
4217                         if (rxbuf->map != NULL) {
4218                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4219                                     BUS_DMASYNC_POSTREAD);
4220                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4221                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4222                         }
4223                         if (rxbuf->m_head != NULL) {
4224                                 m_freem(rxbuf->m_head);
4225                                 rxbuf->m_head = NULL;
4226                         }
4227                 }
4228                 free(rxr->rx_buffers, M_DEVBUF);
4229                 rxr->rx_buffers = NULL;
4230                 rxr->next_to_check = 0;
4231                 rxr->next_to_refresh = 0;
4232         }
4233
4234         if (rxr->rxtag != NULL) {
4235                 bus_dma_tag_destroy(rxr->rxtag);
4236                 rxr->rxtag = NULL;
4237         }
4238
4239         return;
4240 }
4241
4242
4243 /*********************************************************************
4244  *
4245  *  Enable receive unit.
4246  *
4247  **********************************************************************/
4248 #define MAX_INTS_PER_SEC        8000
4249 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4250
4251 static void
4252 em_initialize_receive_unit(struct adapter *adapter)
4253 {
4254         struct rx_ring  *rxr = adapter->rx_rings;
4255         struct ifnet    *ifp = adapter->ifp;
4256         struct e1000_hw *hw = &adapter->hw;
4257         u64     bus_addr;
4258         u32     rctl, rxcsum;
4259
4260         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4261
4262         /*
4263          * Make sure receives are disabled while setting
4264          * up the descriptor ring
4265          */
4266         rctl = E1000_READ_REG(hw, E1000_RCTL);
4267         /* Do not disable if ever enabled on this hardware */
4268         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4269                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4270
4271         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4272             adapter->rx_abs_int_delay.value);
4273         /*
4274          * Set the interrupt throttling rate. Value is calculated
4275          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4276          */
4277         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4278
4279         /*
4280         ** When using MSIX interrupts we need to throttle
4281         ** using the EITR register (82574 only)
4282         */
4283         if (hw->mac.type == e1000_82574) {
4284                 for (int i = 0; i < 4; i++)
4285                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4286                             DEFAULT_ITR);
4287                 /* Disable accelerated acknowledge */
4288                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4289         }
4290
4291         if (ifp->if_capenable & IFCAP_RXCSUM) {
4292                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4293                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4294                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4295         }
4296
4297         /*
4298         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4299         ** long latencies are observed, like Lenovo X60. This
4300         ** change eliminates the problem, but since having positive
4301         ** values in RDTR is a known source of problems on other
4302         ** platforms another solution is being sought.
4303         */
4304         if (hw->mac.type == e1000_82573)
4305                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4306
4307         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4308                 /* Setup the Base and Length of the Rx Descriptor Ring */
4309                 bus_addr = rxr->rxdma.dma_paddr;
4310                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4311                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4312                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4313                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4314                 /* Setup the Head and Tail Descriptor Pointers */
4315                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4316 #ifdef DEV_NETMAP
4317                 /*
4318                  * an init() while a netmap client is active must
4319                  * preserve the rx buffers passed to userspace.
4320                  * In this driver it means we adjust RDT to
4321                  * something different from na->num_rx_desc - 1.
4322                  */
4323                 if (ifp->if_capenable & IFCAP_NETMAP) {
4324                         struct netmap_adapter *na = NA(adapter->ifp);
4325                         struct netmap_kring *kring = &na->rx_rings[i];
4326                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4327
4328                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4329                 } else
4330 #endif /* DEV_NETMAP */
4331                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4332         }
4333
4334         /* Set PTHRESH for improved jumbo performance */
4335         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4336             (adapter->hw.mac.type == e1000_pch2lan) ||
4337             (adapter->hw.mac.type == e1000_ich10lan)) &&
4338             (ifp->if_mtu > ETHERMTU)) {
4339                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4340                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4341         }
4342                 
4343         if (adapter->hw.mac.type == e1000_pch2lan) {
4344                 if (ifp->if_mtu > ETHERMTU)
4345                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4346                 else
4347                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4348         }
4349
4350         /* Setup the Receive Control Register */
4351         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4352         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4353             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4354             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4355
4356         /* Strip the CRC */
4357         rctl |= E1000_RCTL_SECRC;
4358
4359         /* Make sure VLAN Filters are off */
4360         rctl &= ~E1000_RCTL_VFE;
4361         rctl &= ~E1000_RCTL_SBP;
4362
4363         if (adapter->rx_mbuf_sz == MCLBYTES)
4364                 rctl |= E1000_RCTL_SZ_2048;
4365         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4366                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4367         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4368                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4369
4370         if (ifp->if_mtu > ETHERMTU)
4371                 rctl |= E1000_RCTL_LPE;
4372         else
4373                 rctl &= ~E1000_RCTL_LPE;
4374
4375         /* Write out the settings */
4376         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4377
4378         return;
4379 }
4380
4381
4382 /*********************************************************************
4383  *
4384  *  This routine executes in interrupt context. It replenishes
4385  *  the mbufs in the descriptor and sends data which has been
4386  *  dma'ed into host memory to upper layer.
4387  *
4388  *  We loop at most count times if count is > 0, or until done if
4389  *  count < 0.
4390  *  
4391  *  For polling we also now return the number of cleaned packets
4392  *********************************************************************/
4393 static bool
4394 em_rxeof(struct rx_ring *rxr, int count, int *done)
4395 {
4396         struct adapter          *adapter = rxr->adapter;
4397         struct ifnet            *ifp = adapter->ifp;
4398         struct mbuf             *mp, *sendmp;
4399         u8                      status = 0;
4400         u16                     len;
4401         int                     i, processed, rxdone = 0;
4402         bool                    eop;
4403         struct e1000_rx_desc    *cur;
4404
4405         EM_RX_LOCK(rxr);
4406
4407 #ifdef DEV_NETMAP
4408         if (ifp->if_capenable & IFCAP_NETMAP) {
4409                 struct netmap_adapter *na = NA(ifp);
4410
4411                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4412                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4413                 EM_RX_UNLOCK(rxr);
4414                 EM_CORE_LOCK(adapter);
4415                 selwakeuppri(&na->rx_si, PI_NET);
4416                 EM_CORE_UNLOCK(adapter);
4417                 return (0);
4418         }
4419 #endif /* DEV_NETMAP */
4420
4421         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4422
4423                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4424                         break;
4425
4426                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4427                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4428
4429                 cur = &rxr->rx_base[i];
4430                 status = cur->status;
4431                 mp = sendmp = NULL;
4432
4433                 if ((status & E1000_RXD_STAT_DD) == 0)
4434                         break;
4435
4436                 len = le16toh(cur->length);
4437                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4438
4439                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4440                     (rxr->discard == TRUE)) {
4441                         adapter->dropped_pkts++;
4442                         ++rxr->rx_discarded;
4443                         if (!eop) /* Catch subsequent segs */
4444                                 rxr->discard = TRUE;
4445                         else
4446                                 rxr->discard = FALSE;
4447                         em_rx_discard(rxr, i);
4448                         goto next_desc;
4449                 }
4450
4451                 /* Assign correct length to the current fragment */
4452                 mp = rxr->rx_buffers[i].m_head;
4453                 mp->m_len = len;
4454
4455                 /* Trigger for refresh */
4456                 rxr->rx_buffers[i].m_head = NULL;
4457
4458                 /* First segment? */
4459                 if (rxr->fmp == NULL) {
4460                         mp->m_pkthdr.len = len;
4461                         rxr->fmp = rxr->lmp = mp;
4462                 } else {
4463                         /* Chain mbuf's together */
4464                         mp->m_flags &= ~M_PKTHDR;
4465                         rxr->lmp->m_next = mp;
4466                         rxr->lmp = mp;
4467                         rxr->fmp->m_pkthdr.len += len;
4468                 }
4469
4470                 if (eop) {
4471                         --count;
4472                         sendmp = rxr->fmp;
4473                         sendmp->m_pkthdr.rcvif = ifp;
4474                         ifp->if_ipackets++;
4475                         em_receive_checksum(cur, sendmp);
4476 #ifndef __NO_STRICT_ALIGNMENT
4477                         if (adapter->max_frame_size >
4478                             (MCLBYTES - ETHER_ALIGN) &&
4479                             em_fixup_rx(rxr) != 0)
4480                                 goto skip;
4481 #endif
4482                         if (status & E1000_RXD_STAT_VP) {
4483                                 sendmp->m_pkthdr.ether_vtag =
4484                                     le16toh(cur->special);
4485                                 sendmp->m_flags |= M_VLANTAG;
4486                         }
4487 #ifndef __NO_STRICT_ALIGNMENT
4488 skip:
4489 #endif
4490                         rxr->fmp = rxr->lmp = NULL;
4491                 }
4492 next_desc:
4493                 /* Zero out the receive descriptors status. */
4494                 cur->status = 0;
4495                 ++rxdone;       /* cumulative for POLL */
4496                 ++processed;
4497
4498                 /* Advance our pointers to the next descriptor. */
4499                 if (++i == adapter->num_rx_desc)
4500                         i = 0;
4501
4502                 /* Send to the stack */
4503                 if (sendmp != NULL) {
4504                         rxr->next_to_check = i;
4505                         EM_RX_UNLOCK(rxr);
4506                         (*ifp->if_input)(ifp, sendmp);
4507                         EM_RX_LOCK(rxr);
4508                         i = rxr->next_to_check;
4509                 }
4510
4511                 /* Only refresh mbufs every 8 descriptors */
4512                 if (processed == 8) {
4513                         em_refresh_mbufs(rxr, i);
4514                         processed = 0;
4515                 }
4516         }
4517
4518         /* Catch any remaining refresh work */
4519         if (e1000_rx_unrefreshed(rxr))
4520                 em_refresh_mbufs(rxr, i);
4521
4522         rxr->next_to_check = i;
4523         if (done != NULL)
4524                 *done = rxdone;
4525         EM_RX_UNLOCK(rxr);
4526
4527         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4528 }
4529
4530 static __inline void
4531 em_rx_discard(struct rx_ring *rxr, int i)
4532 {
4533         struct em_buffer        *rbuf;
4534
4535         rbuf = &rxr->rx_buffers[i];
4536         /* Free any previous pieces */
4537         if (rxr->fmp != NULL) {
4538                 rxr->fmp->m_flags |= M_PKTHDR;
4539                 m_freem(rxr->fmp);
4540                 rxr->fmp = NULL;
4541                 rxr->lmp = NULL;
4542         }
4543         /*
4544         ** Free buffer and allow em_refresh_mbufs()
4545         ** to clean up and recharge buffer.
4546         */
4547         if (rbuf->m_head) {
4548                 m_free(rbuf->m_head);
4549                 rbuf->m_head = NULL;
4550         }
4551         return;
4552 }
4553
4554 #ifndef __NO_STRICT_ALIGNMENT
4555 /*
4556  * When jumbo frames are enabled we should realign entire payload on
4557  * architecures with strict alignment. This is serious design mistake of 8254x
4558  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4559  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4560  * payload. On architecures without strict alignment restrictions 8254x still
4561  * performs unaligned memory access which would reduce the performance too.
4562  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4563  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4564  * existing mbuf chain.
4565  *
4566  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4567  * not used at all on architectures with strict alignment.
4568  */
4569 static int
4570 em_fixup_rx(struct rx_ring *rxr)
4571 {
4572         struct adapter *adapter = rxr->adapter;
4573         struct mbuf *m, *n;
4574         int error;
4575
4576         error = 0;
4577         m = rxr->fmp;
4578         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4579                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4580                 m->m_data += ETHER_HDR_LEN;
4581         } else {
4582                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4583                 if (n != NULL) {
4584                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4585                         m->m_data += ETHER_HDR_LEN;
4586                         m->m_len -= ETHER_HDR_LEN;
4587                         n->m_len = ETHER_HDR_LEN;
4588                         M_MOVE_PKTHDR(n, m);
4589                         n->m_next = m;
4590                         rxr->fmp = n;
4591                 } else {
4592                         adapter->dropped_pkts++;
4593                         m_freem(rxr->fmp);
4594                         rxr->fmp = NULL;
4595                         error = ENOMEM;
4596                 }
4597         }
4598
4599         return (error);
4600 }
4601 #endif
4602
4603 /*********************************************************************
4604  *
4605  *  Verify that the hardware indicated that the checksum is valid.
4606  *  Inform the stack about the status of checksum so that stack
4607  *  doesn't spend time verifying the checksum.
4608  *
4609  *********************************************************************/
4610 static void
4611 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4612 {
4613         /* Ignore Checksum bit is set */
4614         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4615                 mp->m_pkthdr.csum_flags = 0;
4616                 return;
4617         }
4618
4619         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4620                 /* Did it pass? */
4621                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4622                         /* IP Checksum Good */
4623                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4624                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4625
4626                 } else {
4627                         mp->m_pkthdr.csum_flags = 0;
4628                 }
4629         }
4630
4631         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4632                 /* Did it pass? */
4633                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4634                         mp->m_pkthdr.csum_flags |=
4635                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4636                         mp->m_pkthdr.csum_data = htons(0xffff);
4637                 }
4638         }
4639 }
4640
4641 /*
4642  * This routine is run via an vlan
4643  * config EVENT
4644  */
4645 static void
4646 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4647 {
4648         struct adapter  *adapter = ifp->if_softc;
4649         u32             index, bit;
4650
4651         if (ifp->if_softc !=  arg)   /* Not our event */
4652                 return;
4653
4654         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4655                 return;
4656
4657         EM_CORE_LOCK(adapter);
4658         index = (vtag >> 5) & 0x7F;
4659         bit = vtag & 0x1F;
4660         adapter->shadow_vfta[index] |= (1 << bit);
4661         ++adapter->num_vlans;
4662         /* Re-init to load the changes */
4663         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4664                 em_init_locked(adapter);
4665         EM_CORE_UNLOCK(adapter);
4666 }
4667
4668 /*
4669  * This routine is run via an vlan
4670  * unconfig EVENT
4671  */
4672 static void
4673 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4674 {
4675         struct adapter  *adapter = ifp->if_softc;
4676         u32             index, bit;
4677
4678         if (ifp->if_softc !=  arg)
4679                 return;
4680
4681         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4682                 return;
4683
4684         EM_CORE_LOCK(adapter);
4685         index = (vtag >> 5) & 0x7F;
4686         bit = vtag & 0x1F;
4687         adapter->shadow_vfta[index] &= ~(1 << bit);
4688         --adapter->num_vlans;
4689         /* Re-init to load the changes */
4690         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4691                 em_init_locked(adapter);
4692         EM_CORE_UNLOCK(adapter);
4693 }
4694
4695 static void
4696 em_setup_vlan_hw_support(struct adapter *adapter)
4697 {
4698         struct e1000_hw *hw = &adapter->hw;
4699         u32             reg;
4700
4701         /*
4702         ** We get here thru init_locked, meaning
4703         ** a soft reset, this has already cleared
4704         ** the VFTA and other state, so if there
4705         ** have been no vlan's registered do nothing.
4706         */
4707         if (adapter->num_vlans == 0)
4708                 return;
4709
4710         /*
4711         ** A soft reset zero's out the VFTA, so
4712         ** we need to repopulate it now.
4713         */
4714         for (int i = 0; i < EM_VFTA_SIZE; i++)
4715                 if (adapter->shadow_vfta[i] != 0)
4716                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4717                             i, adapter->shadow_vfta[i]);
4718
4719         reg = E1000_READ_REG(hw, E1000_CTRL);
4720         reg |= E1000_CTRL_VME;
4721         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4722
4723         /* Enable the Filter Table */
4724         reg = E1000_READ_REG(hw, E1000_RCTL);
4725         reg &= ~E1000_RCTL_CFIEN;
4726         reg |= E1000_RCTL_VFE;
4727         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4728 }
4729
4730 static void
4731 em_enable_intr(struct adapter *adapter)
4732 {
4733         struct e1000_hw *hw = &adapter->hw;
4734         u32 ims_mask = IMS_ENABLE_MASK;
4735
4736         if (hw->mac.type == e1000_82574) {
4737                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4738                 ims_mask |= EM_MSIX_MASK;
4739         } 
4740         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4741 }
4742
4743 static void
4744 em_disable_intr(struct adapter *adapter)
4745 {
4746         struct e1000_hw *hw = &adapter->hw;
4747
4748         if (hw->mac.type == e1000_82574)
4749                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4750         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4751 }
4752
4753 /*
4754  * Bit of a misnomer, what this really means is
4755  * to enable OS management of the system... aka
4756  * to disable special hardware management features 
4757  */
4758 static void
4759 em_init_manageability(struct adapter *adapter)
4760 {
4761         /* A shared code workaround */
4762 #define E1000_82542_MANC2H E1000_MANC2H
4763         if (adapter->has_manage) {
4764                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4765                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4766
4767                 /* disable hardware interception of ARP */
4768                 manc &= ~(E1000_MANC_ARP_EN);
4769
4770                 /* enable receiving management packets to the host */
4771                 manc |= E1000_MANC_EN_MNG2HOST;
4772 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4773 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4774                 manc2h |= E1000_MNG2HOST_PORT_623;
4775                 manc2h |= E1000_MNG2HOST_PORT_664;
4776                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4777                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4778         }
4779 }
4780
4781 /*
4782  * Give control back to hardware management
4783  * controller if there is one.
4784  */
4785 static void
4786 em_release_manageability(struct adapter *adapter)
4787 {
4788         if (adapter->has_manage) {
4789                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4790
4791                 /* re-enable hardware interception of ARP */
4792                 manc |= E1000_MANC_ARP_EN;
4793                 manc &= ~E1000_MANC_EN_MNG2HOST;
4794
4795                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4796         }
4797 }
4798
4799 /*
4800  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4801  * For ASF and Pass Through versions of f/w this means
4802  * that the driver is loaded. For AMT version type f/w
4803  * this means that the network i/f is open.
4804  */
4805 static void
4806 em_get_hw_control(struct adapter *adapter)
4807 {
4808         u32 ctrl_ext, swsm;
4809
4810         if (adapter->hw.mac.type == e1000_82573) {
4811                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4812                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4813                     swsm | E1000_SWSM_DRV_LOAD);
4814                 return;
4815         }
4816         /* else */
4817         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4818         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4819             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4820         return;
4821 }
4822
4823 /*
4824  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4825  * For ASF and Pass Through versions of f/w this means that
4826  * the driver is no longer loaded. For AMT versions of the
4827  * f/w this means that the network i/f is closed.
4828  */
4829 static void
4830 em_release_hw_control(struct adapter *adapter)
4831 {
4832         u32 ctrl_ext, swsm;
4833
4834         if (!adapter->has_manage)
4835                 return;
4836
4837         if (adapter->hw.mac.type == e1000_82573) {
4838                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4839                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4840                     swsm & ~E1000_SWSM_DRV_LOAD);
4841                 return;
4842         }
4843         /* else */
4844         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4845         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4846             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4847         return;
4848 }
4849
4850 static int
4851 em_is_valid_ether_addr(u8 *addr)
4852 {
4853         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4854
4855         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4856                 return (FALSE);
4857         }
4858
4859         return (TRUE);
4860 }
4861
4862 /*
4863 ** Parse the interface capabilities with regard
4864 ** to both system management and wake-on-lan for
4865 ** later use.
4866 */
4867 static void
4868 em_get_wakeup(device_t dev)
4869 {
4870         struct adapter  *adapter = device_get_softc(dev);
4871         u16             eeprom_data = 0, device_id, apme_mask;
4872
4873         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4874         apme_mask = EM_EEPROM_APME;
4875
4876         switch (adapter->hw.mac.type) {
4877         case e1000_82573:
4878         case e1000_82583:
4879                 adapter->has_amt = TRUE;
4880                 /* Falls thru */
4881         case e1000_82571:
4882         case e1000_82572:
4883         case e1000_80003es2lan:
4884                 if (adapter->hw.bus.func == 1) {
4885                         e1000_read_nvm(&adapter->hw,
4886                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4887                         break;
4888                 } else
4889                         e1000_read_nvm(&adapter->hw,
4890                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4891                 break;
4892         case e1000_ich8lan:
4893         case e1000_ich9lan:
4894         case e1000_ich10lan:
4895         case e1000_pchlan:
4896         case e1000_pch2lan:
4897                 apme_mask = E1000_WUC_APME;
4898                 adapter->has_amt = TRUE;
4899                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4900                 break;
4901         default:
4902                 e1000_read_nvm(&adapter->hw,
4903                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4904                 break;
4905         }
4906         if (eeprom_data & apme_mask)
4907                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4908         /*
4909          * We have the eeprom settings, now apply the special cases
4910          * where the eeprom may be wrong or the board won't support
4911          * wake on lan on a particular port
4912          */
4913         device_id = pci_get_device(dev);
4914         switch (device_id) {
4915         case E1000_DEV_ID_82571EB_FIBER:
4916                 /* Wake events only supported on port A for dual fiber
4917                  * regardless of eeprom setting */
4918                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4919                     E1000_STATUS_FUNC_1)
4920                         adapter->wol = 0;
4921                 break;
4922         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4923         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4924         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4925                 /* if quad port adapter, disable WoL on all but port A */
4926                 if (global_quad_port_a != 0)
4927                         adapter->wol = 0;
4928                 /* Reset for multiple quad port adapters */
4929                 if (++global_quad_port_a == 4)
4930                         global_quad_port_a = 0;
4931                 break;
4932         }
4933         return;
4934 }
4935
4936
4937 /*
4938  * Enable PCI Wake On Lan capability
4939  */
4940 static void
4941 em_enable_wakeup(device_t dev)
4942 {
4943         struct adapter  *adapter = device_get_softc(dev);
4944         struct ifnet    *ifp = adapter->ifp;
4945         u32             pmc, ctrl, ctrl_ext, rctl;
4946         u16             status;
4947
4948         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4949                 return;
4950
4951         /* Advertise the wakeup capability */
4952         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4953         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4954         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4955         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4956
4957         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4958             (adapter->hw.mac.type == e1000_pchlan) ||
4959             (adapter->hw.mac.type == e1000_ich9lan) ||
4960             (adapter->hw.mac.type == e1000_ich10lan))
4961                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4962
4963         /* Keep the laser running on Fiber adapters */
4964         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4965             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4966                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4967                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4968                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4969         }
4970
4971         /*
4972         ** Determine type of Wakeup: note that wol
4973         ** is set with all bits on by default.
4974         */
4975         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4976                 adapter->wol &= ~E1000_WUFC_MAG;
4977
4978         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4979                 adapter->wol &= ~E1000_WUFC_MC;
4980         else {
4981                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4982                 rctl |= E1000_RCTL_MPE;
4983                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4984         }
4985
4986         if ((adapter->hw.mac.type == e1000_pchlan) ||
4987             (adapter->hw.mac.type == e1000_pch2lan)) {
4988                 if (em_enable_phy_wakeup(adapter))
4989                         return;
4990         } else {
4991                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4992                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4993         }
4994
4995         if (adapter->hw.phy.type == e1000_phy_igp_3)
4996                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4997
4998         /* Request PME */
4999         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5000         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5001         if (ifp->if_capenable & IFCAP_WOL)
5002                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5003         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5004
5005         return;
5006 }
5007
5008 /*
5009 ** WOL in the newer chipset interfaces (pchlan)
5010 ** require thing to be copied into the phy
5011 */
5012 static int
5013 em_enable_phy_wakeup(struct adapter *adapter)
5014 {
5015         struct e1000_hw *hw = &adapter->hw;
5016         u32 mreg, ret = 0;
5017         u16 preg;
5018
5019         /* copy MAC RARs to PHY RARs */
5020         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5021
5022         /* copy MAC MTA to PHY MTA */
5023         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5024                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5025                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5026                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5027                     (u16)((mreg >> 16) & 0xFFFF));
5028         }
5029
5030         /* configure PHY Rx Control register */
5031         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5032         mreg = E1000_READ_REG(hw, E1000_RCTL);
5033         if (mreg & E1000_RCTL_UPE)
5034                 preg |= BM_RCTL_UPE;
5035         if (mreg & E1000_RCTL_MPE)
5036                 preg |= BM_RCTL_MPE;
5037         preg &= ~(BM_RCTL_MO_MASK);
5038         if (mreg & E1000_RCTL_MO_3)
5039                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5040                                 << BM_RCTL_MO_SHIFT);
5041         if (mreg & E1000_RCTL_BAM)
5042                 preg |= BM_RCTL_BAM;
5043         if (mreg & E1000_RCTL_PMCF)
5044                 preg |= BM_RCTL_PMCF;
5045         mreg = E1000_READ_REG(hw, E1000_CTRL);
5046         if (mreg & E1000_CTRL_RFCE)
5047                 preg |= BM_RCTL_RFCE;
5048         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5049
5050         /* enable PHY wakeup in MAC register */
5051         E1000_WRITE_REG(hw, E1000_WUC,
5052             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5053         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5054
5055         /* configure and enable PHY wakeup in PHY registers */
5056         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5057         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5058
5059         /* activate PHY wakeup */
5060         ret = hw->phy.ops.acquire(hw);
5061         if (ret) {
5062                 printf("Could not acquire PHY\n");
5063                 return ret;
5064         }
5065         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5066                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5067         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5068         if (ret) {
5069                 printf("Could not read PHY page 769\n");
5070                 goto out;
5071         }
5072         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5073         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5074         if (ret)
5075                 printf("Could not set PHY Host Wakeup bit\n");
5076 out:
5077         hw->phy.ops.release(hw);
5078
5079         return ret;
5080 }
5081
5082 static void
5083 em_led_func(void *arg, int onoff)
5084 {
5085         struct adapter  *adapter = arg;
5086  
5087         EM_CORE_LOCK(adapter);
5088         if (onoff) {
5089                 e1000_setup_led(&adapter->hw);
5090                 e1000_led_on(&adapter->hw);
5091         } else {
5092                 e1000_led_off(&adapter->hw);
5093                 e1000_cleanup_led(&adapter->hw);
5094         }
5095         EM_CORE_UNLOCK(adapter);
5096 }
5097
5098 /*
5099 ** Disable the L0S and L1 LINK states
5100 */
5101 static void
5102 em_disable_aspm(struct adapter *adapter)
5103 {
5104         int             base, reg;
5105         u16             link_cap,link_ctrl;
5106         device_t        dev = adapter->dev;
5107
5108         switch (adapter->hw.mac.type) {
5109                 case e1000_82573:
5110                 case e1000_82574:
5111                 case e1000_82583:
5112                         break;
5113                 default:
5114                         return;
5115         }
5116         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5117                 return;
5118         reg = base + PCIER_LINK_CAP;
5119         link_cap = pci_read_config(dev, reg, 2);
5120         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5121                 return;
5122         reg = base + PCIER_LINK_CTL;
5123         link_ctrl = pci_read_config(dev, reg, 2);
5124         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5125         pci_write_config(dev, reg, link_ctrl, 2);
5126         return;
5127 }
5128
5129 /**********************************************************************
5130  *
5131  *  Update the board statistics counters.
5132  *
5133  **********************************************************************/
5134 static void
5135 em_update_stats_counters(struct adapter *adapter)
5136 {
5137         struct ifnet   *ifp;
5138
5139         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5140            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5141                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5142                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5143         }
5144         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5145         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5146         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5147         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5148
5149         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5150         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5151         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5152         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5153         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5154         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5155         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5156         /*
5157         ** For watchdog management we need to know if we have been
5158         ** paused during the last interval, so capture that here.
5159         */
5160         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5161         adapter->stats.xoffrxc += adapter->pause_frames;
5162         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5163         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5164         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5165         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5166         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5167         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5168         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5169         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5170         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5171         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5172         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5173         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5174
5175         /* For the 64-bit byte counters the low dword must be read first. */
5176         /* Both registers clear on the read of the high dword */
5177
5178         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5179             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5180         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5181             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5182
5183         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5184         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5185         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5186         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5187         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5188
5189         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5190         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5191
5192         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5193         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5194         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5195         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5196         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5197         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5198         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5199         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5200         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5201         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5202
5203         /* Interrupt Counts */
5204
5205         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5206         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5207         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5208         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5209         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5210         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5211         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5212         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5213         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5214
5215         if (adapter->hw.mac.type >= e1000_82543) {
5216                 adapter->stats.algnerrc += 
5217                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5218                 adapter->stats.rxerrc += 
5219                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5220                 adapter->stats.tncrs += 
5221                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5222                 adapter->stats.cexterr += 
5223                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5224                 adapter->stats.tsctc += 
5225                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5226                 adapter->stats.tsctfc += 
5227                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5228         }
5229         ifp = adapter->ifp;
5230
5231         ifp->if_collisions = adapter->stats.colc;
5232
5233         /* Rx Errors */
5234         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5235             adapter->stats.crcerrs + adapter->stats.algnerrc +
5236             adapter->stats.ruc + adapter->stats.roc +
5237             adapter->stats.mpc + adapter->stats.cexterr;
5238
5239         /* Tx Errors */
5240         ifp->if_oerrors = adapter->stats.ecol +
5241             adapter->stats.latecol + adapter->watchdog_events;
5242 }
5243
5244 /* Export a single 32-bit register via a read-only sysctl. */
5245 static int
5246 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5247 {
5248         struct adapter *adapter;
5249         u_int val;
5250
5251         adapter = oidp->oid_arg1;
5252         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5253         return (sysctl_handle_int(oidp, &val, 0, req));
5254 }
5255
5256 /*
5257  * Add sysctl variables, one per statistic, to the system.
5258  */
5259 static void
5260 em_add_hw_stats(struct adapter *adapter)
5261 {
5262         device_t dev = adapter->dev;
5263
5264         struct tx_ring *txr = adapter->tx_rings;
5265         struct rx_ring *rxr = adapter->rx_rings;
5266
5267         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5268         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5269         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5270         struct e1000_hw_stats *stats = &adapter->stats;
5271
5272         struct sysctl_oid *stat_node, *queue_node, *int_node;
5273         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5274
5275 #define QUEUE_NAME_LEN 32
5276         char namebuf[QUEUE_NAME_LEN];
5277         
5278         /* Driver Statistics */
5279         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5280                         CTLFLAG_RD, &adapter->link_irq,
5281                         "Link MSIX IRQ Handled");
5282         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5283                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5284                          "Std mbuf failed");
5285         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5286                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5287                          "Std mbuf cluster failed");
5288         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5289                         CTLFLAG_RD, &adapter->dropped_pkts,
5290                         "Driver dropped packets");
5291         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5292                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5293                         "Driver tx dma failure in xmit");
5294         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5295                         CTLFLAG_RD, &adapter->rx_overruns,
5296                         "RX overruns");
5297         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5298                         CTLFLAG_RD, &adapter->watchdog_events,
5299                         "Watchdog timeouts");
5300         
5301         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5302                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5303                         em_sysctl_reg_handler, "IU",
5304                         "Device Control Register");
5305         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5306                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5307                         em_sysctl_reg_handler, "IU",
5308                         "Receiver Control Register");
5309         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5310                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5311                         "Flow Control High Watermark");
5312         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5313                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5314                         "Flow Control Low Watermark");
5315
5316         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5317                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5318                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5319                                             CTLFLAG_RD, NULL, "Queue Name");
5320                 queue_list = SYSCTL_CHILDREN(queue_node);
5321
5322                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5323                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5324                                 E1000_TDH(txr->me),
5325                                 em_sysctl_reg_handler, "IU",
5326                                 "Transmit Descriptor Head");
5327                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5328                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5329                                 E1000_TDT(txr->me),
5330                                 em_sysctl_reg_handler, "IU",
5331                                 "Transmit Descriptor Tail");
5332                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5333                                 CTLFLAG_RD, &txr->tx_irq,
5334                                 "Queue MSI-X Transmit Interrupts");
5335                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5336                                 CTLFLAG_RD, &txr->no_desc_avail,
5337                                 "Queue No Descriptor Available");
5338                 
5339                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5340                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341                                 E1000_RDH(rxr->me),
5342                                 em_sysctl_reg_handler, "IU",
5343                                 "Receive Descriptor Head");
5344                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5345                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5346                                 E1000_RDT(rxr->me),
5347                                 em_sysctl_reg_handler, "IU",
5348                                 "Receive Descriptor Tail");
5349                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5350                                 CTLFLAG_RD, &rxr->rx_irq,
5351                                 "Queue MSI-X Receive Interrupts");
5352         }
5353
5354         /* MAC stats get their own sub node */
5355
5356         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5357                                     CTLFLAG_RD, NULL, "Statistics");
5358         stat_list = SYSCTL_CHILDREN(stat_node);
5359
5360         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5361                         CTLFLAG_RD, &stats->ecol,
5362                         "Excessive collisions");
5363         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5364                         CTLFLAG_RD, &stats->scc,
5365                         "Single collisions");
5366         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5367                         CTLFLAG_RD, &stats->mcc,
5368                         "Multiple collisions");
5369         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5370                         CTLFLAG_RD, &stats->latecol,
5371                         "Late collisions");
5372         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5373                         CTLFLAG_RD, &stats->colc,
5374                         "Collision Count");
5375         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5376                         CTLFLAG_RD, &adapter->stats.symerrs,
5377                         "Symbol Errors");
5378         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5379                         CTLFLAG_RD, &adapter->stats.sec,
5380                         "Sequence Errors");
5381         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5382                         CTLFLAG_RD, &adapter->stats.dc,
5383                         "Defer Count");
5384         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5385                         CTLFLAG_RD, &adapter->stats.mpc,
5386                         "Missed Packets");
5387         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5388                         CTLFLAG_RD, &adapter->stats.rnbc,
5389                         "Receive No Buffers");
5390         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5391                         CTLFLAG_RD, &adapter->stats.ruc,
5392                         "Receive Undersize");
5393         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5394                         CTLFLAG_RD, &adapter->stats.rfc,
5395                         "Fragmented Packets Received ");
5396         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5397                         CTLFLAG_RD, &adapter->stats.roc,
5398                         "Oversized Packets Received");
5399         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5400                         CTLFLAG_RD, &adapter->stats.rjc,
5401                         "Recevied Jabber");
5402         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5403                         CTLFLAG_RD, &adapter->stats.rxerrc,
5404                         "Receive Errors");
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5406                         CTLFLAG_RD, &adapter->stats.crcerrs,
5407                         "CRC errors");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5409                         CTLFLAG_RD, &adapter->stats.algnerrc,
5410                         "Alignment Errors");
5411         /* On 82575 these are collision counts */
5412         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5413                         CTLFLAG_RD, &adapter->stats.cexterr,
5414                         "Collision/Carrier extension errors");
5415         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5416                         CTLFLAG_RD, &adapter->stats.xonrxc,
5417                         "XON Received");
5418         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5419                         CTLFLAG_RD, &adapter->stats.xontxc,
5420                         "XON Transmitted");
5421         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5422                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5423                         "XOFF Received");
5424         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5425                         CTLFLAG_RD, &adapter->stats.xofftxc,
5426                         "XOFF Transmitted");
5427
5428         /* Packet Reception Stats */
5429         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5430                         CTLFLAG_RD, &adapter->stats.tpr,
5431                         "Total Packets Received ");
5432         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5433                         CTLFLAG_RD, &adapter->stats.gprc,
5434                         "Good Packets Received");
5435         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5436                         CTLFLAG_RD, &adapter->stats.bprc,
5437                         "Broadcast Packets Received");
5438         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5439                         CTLFLAG_RD, &adapter->stats.mprc,
5440                         "Multicast Packets Received");
5441         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5442                         CTLFLAG_RD, &adapter->stats.prc64,
5443                         "64 byte frames received ");
5444         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5445                         CTLFLAG_RD, &adapter->stats.prc127,
5446                         "65-127 byte frames received");
5447         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5448                         CTLFLAG_RD, &adapter->stats.prc255,
5449                         "128-255 byte frames received");
5450         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5451                         CTLFLAG_RD, &adapter->stats.prc511,
5452                         "256-511 byte frames received");
5453         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5454                         CTLFLAG_RD, &adapter->stats.prc1023,
5455                         "512-1023 byte frames received");
5456         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5457                         CTLFLAG_RD, &adapter->stats.prc1522,
5458                         "1023-1522 byte frames received");
5459         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5460                         CTLFLAG_RD, &adapter->stats.gorc, 
5461                         "Good Octets Received"); 
5462
5463         /* Packet Transmission Stats */
5464         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5465                         CTLFLAG_RD, &adapter->stats.gotc, 
5466                         "Good Octets Transmitted"); 
5467         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5468                         CTLFLAG_RD, &adapter->stats.tpt,
5469                         "Total Packets Transmitted");
5470         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5471                         CTLFLAG_RD, &adapter->stats.gptc,
5472                         "Good Packets Transmitted");
5473         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5474                         CTLFLAG_RD, &adapter->stats.bptc,
5475                         "Broadcast Packets Transmitted");
5476         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5477                         CTLFLAG_RD, &adapter->stats.mptc,
5478                         "Multicast Packets Transmitted");
5479         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5480                         CTLFLAG_RD, &adapter->stats.ptc64,
5481                         "64 byte frames transmitted ");
5482         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5483                         CTLFLAG_RD, &adapter->stats.ptc127,
5484                         "65-127 byte frames transmitted");
5485         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5486                         CTLFLAG_RD, &adapter->stats.ptc255,
5487                         "128-255 byte frames transmitted");
5488         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5489                         CTLFLAG_RD, &adapter->stats.ptc511,
5490                         "256-511 byte frames transmitted");
5491         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5492                         CTLFLAG_RD, &adapter->stats.ptc1023,
5493                         "512-1023 byte frames transmitted");
5494         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5495                         CTLFLAG_RD, &adapter->stats.ptc1522,
5496                         "1024-1522 byte frames transmitted");
5497         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5498                         CTLFLAG_RD, &adapter->stats.tsctc,
5499                         "TSO Contexts Transmitted");
5500         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5501                         CTLFLAG_RD, &adapter->stats.tsctfc,
5502                         "TSO Contexts Failed");
5503
5504
5505         /* Interrupt Stats */
5506
5507         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5508                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5509         int_list = SYSCTL_CHILDREN(int_node);
5510
5511         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5512                         CTLFLAG_RD, &adapter->stats.iac,
5513                         "Interrupt Assertion Count");
5514
5515         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5516                         CTLFLAG_RD, &adapter->stats.icrxptc,
5517                         "Interrupt Cause Rx Pkt Timer Expire Count");
5518
5519         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5520                         CTLFLAG_RD, &adapter->stats.icrxatc,
5521                         "Interrupt Cause Rx Abs Timer Expire Count");
5522
5523         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5524                         CTLFLAG_RD, &adapter->stats.ictxptc,
5525                         "Interrupt Cause Tx Pkt Timer Expire Count");
5526
5527         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5528                         CTLFLAG_RD, &adapter->stats.ictxatc,
5529                         "Interrupt Cause Tx Abs Timer Expire Count");
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5532                         CTLFLAG_RD, &adapter->stats.ictxqec,
5533                         "Interrupt Cause Tx Queue Empty Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5536                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5537                         "Interrupt Cause Tx Queue Min Thresh Count");
5538
5539         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5540                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5541                         "Interrupt Cause Rx Desc Min Thresh Count");
5542
5543         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5544                         CTLFLAG_RD, &adapter->stats.icrxoc,
5545                         "Interrupt Cause Receiver Overrun Count");
5546 }
5547
5548 /**********************************************************************
5549  *
5550  *  This routine provides a way to dump out the adapter eeprom,
5551  *  often a useful debug/service tool. This only dumps the first
5552  *  32 words, stuff that matters is in that extent.
5553  *
5554  **********************************************************************/
5555 static int
5556 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5557 {
5558         struct adapter *adapter = (struct adapter *)arg1;
5559         int error;
5560         int result;
5561
5562         result = -1;
5563         error = sysctl_handle_int(oidp, &result, 0, req);
5564
5565         if (error || !req->newptr)
5566                 return (error);
5567
5568         /*
5569          * This value will cause a hex dump of the
5570          * first 32 16-bit words of the EEPROM to
5571          * the screen.
5572          */
5573         if (result == 1)
5574                 em_print_nvm_info(adapter);
5575
5576         return (error);
5577 }
5578
5579 static void
5580 em_print_nvm_info(struct adapter *adapter)
5581 {
5582         u16     eeprom_data;
5583         int     i, j, row = 0;
5584
5585         /* Its a bit crude, but it gets the job done */
5586         printf("\nInterface EEPROM Dump:\n");
5587         printf("Offset\n0x0000  ");
5588         for (i = 0, j = 0; i < 32; i++, j++) {
5589                 if (j == 8) { /* Make the offset block */
5590                         j = 0; ++row;
5591                         printf("\n0x00%x0  ",row);
5592                 }
5593                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5594                 printf("%04x ", eeprom_data);
5595         }
5596         printf("\n");
5597 }
5598
5599 static int
5600 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5601 {
5602         struct em_int_delay_info *info;
5603         struct adapter *adapter;
5604         u32 regval;
5605         int error, usecs, ticks;
5606
5607         info = (struct em_int_delay_info *)arg1;
5608         usecs = info->value;
5609         error = sysctl_handle_int(oidp, &usecs, 0, req);
5610         if (error != 0 || req->newptr == NULL)
5611                 return (error);
5612         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5613                 return (EINVAL);
5614         info->value = usecs;
5615         ticks = EM_USECS_TO_TICKS(usecs);
5616
5617         adapter = info->adapter;
5618         
5619         EM_CORE_LOCK(adapter);
5620         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5621         regval = (regval & ~0xffff) | (ticks & 0xffff);
5622         /* Handle a few special cases. */
5623         switch (info->offset) {
5624         case E1000_RDTR:
5625                 break;
5626         case E1000_TIDV:
5627                 if (ticks == 0) {
5628                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5629                         /* Don't write 0 into the TIDV register. */
5630                         regval++;
5631                 } else
5632                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5633                 break;
5634         }
5635         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5636         EM_CORE_UNLOCK(adapter);
5637         return (0);
5638 }
5639
5640 static void
5641 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5642         const char *description, struct em_int_delay_info *info,
5643         int offset, int value)
5644 {
5645         info->adapter = adapter;
5646         info->offset = offset;
5647         info->value = value;
5648         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5649             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5650             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5651             info, 0, em_sysctl_int_delay, "I", description);
5652 }
5653
5654 static void
5655 em_set_sysctl_value(struct adapter *adapter, const char *name,
5656         const char *description, int *limit, int value)
5657 {
5658         *limit = value;
5659         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5660             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5661             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5662 }
5663
5664
5665 /*
5666 ** Set flow control using sysctl:
5667 ** Flow control values:
5668 **      0 - off
5669 **      1 - rx pause
5670 **      2 - tx pause
5671 **      3 - full
5672 */
5673 static int
5674 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5675 {       
5676         int             error;
5677         static int      input = 3; /* default is full */
5678         struct adapter  *adapter = (struct adapter *) arg1;
5679                     
5680         error = sysctl_handle_int(oidp, &input, 0, req);
5681     
5682         if ((error) || (req->newptr == NULL))
5683                 return (error);
5684                 
5685         if (input == adapter->fc) /* no change? */
5686                 return (error);
5687
5688         switch (input) {
5689                 case e1000_fc_rx_pause:
5690                 case e1000_fc_tx_pause:
5691                 case e1000_fc_full:
5692                 case e1000_fc_none:
5693                         adapter->hw.fc.requested_mode = input;
5694                         adapter->fc = input;
5695                         break;
5696                 default:
5697                         /* Do nothing */
5698                         return (error);
5699         }
5700
5701         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5702         e1000_force_mac_fc(&adapter->hw);
5703         return (error);
5704 }
5705
5706 /*
5707 ** Manage Energy Efficient Ethernet:
5708 ** Control values:
5709 **     0/1 - enabled/disabled
5710 */
5711 static int
5712 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5713 {
5714        struct adapter *adapter = (struct adapter *) arg1;
5715        int             error, value;
5716
5717        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5718        error = sysctl_handle_int(oidp, &value, 0, req);
5719        if (error || req->newptr == NULL)
5720                return (error);
5721        EM_CORE_LOCK(adapter);
5722        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5723        em_init_locked(adapter);
5724        EM_CORE_UNLOCK(adapter);
5725        return (0);
5726 }
5727
5728 static int
5729 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5730 {
5731         struct adapter *adapter;
5732         int error;
5733         int result;
5734
5735         result = -1;
5736         error = sysctl_handle_int(oidp, &result, 0, req);
5737
5738         if (error || !req->newptr)
5739                 return (error);
5740
5741         if (result == 1) {
5742                 adapter = (struct adapter *)arg1;
5743                 em_print_debug_info(adapter);
5744         }
5745
5746         return (error);
5747 }
5748
5749 /*
5750 ** This routine is meant to be fluid, add whatever is
5751 ** needed for debugging a problem.  -jfv
5752 */
5753 static void
5754 em_print_debug_info(struct adapter *adapter)
5755 {
5756         device_t dev = adapter->dev;
5757         struct tx_ring *txr = adapter->tx_rings;
5758         struct rx_ring *rxr = adapter->rx_rings;
5759
5760         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5761                 printf("Interface is RUNNING ");
5762         else
5763                 printf("Interface is NOT RUNNING\n");
5764
5765         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5766                 printf("and INACTIVE\n");
5767         else
5768                 printf("and ACTIVE\n");
5769
5770         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5771             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5772             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5773         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5774             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5775             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5776         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5777         device_printf(dev, "TX descriptors avail = %d\n",
5778             txr->tx_avail);
5779         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5780             txr->no_desc_avail);
5781         device_printf(dev, "RX discarded packets = %ld\n",
5782             rxr->rx_discarded);
5783         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5784         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5785 }